summaryrefslogtreecommitdiff
path: root/libavcodec/hapdec.c
diff options
context:
space:
mode:
authorLuca Barbato <lu_zero@gentoo.org>2015-07-17 03:07:07 +0200
committerLuca Barbato <lu_zero@gentoo.org>2015-07-21 12:14:25 +0200
commit977105407cae55876041dddbf4ce0934cdd4cd6c (patch)
tree306fb9cc7fa9c055347193456a8e0e2128a84afd /libavcodec/hapdec.c
parent219b39a71a5694b1c14a07b86477f665a5b6849b (diff)
hap: Decode using optimal slices sizes
Enjoy some cache locality and use less threads. About 5x speedup (from 60ms to 12ms to decode a 4k frame). Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Diffstat (limited to 'libavcodec/hapdec.c')
-rw-r--r--libavcodec/hapdec.c33
1 files changed, 25 insertions, 8 deletions
diff --git a/libavcodec/hapdec.c b/libavcodec/hapdec.c
index 72db9f4702..5133a51323 100644
--- a/libavcodec/hapdec.c
+++ b/libavcodec/hapdec.c
@@ -137,16 +137,30 @@ static int setup_texture(AVCodecContext *avctx, size_t length)
}
static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
- int block_nb, int thread_nb)
+ int slice, int thread_nb)
{
HapContext *ctx = avctx->priv_data;
AVFrame *frame = arg;
- int x = (TEXTURE_BLOCK_W * block_nb) % avctx->coded_width;
- int y = TEXTURE_BLOCK_H * (TEXTURE_BLOCK_W * block_nb / avctx->coded_width);
- uint8_t *p = frame->data[0] + x * 4 + y * frame->linesize[0];
- const uint8_t *d = ctx->tex_data + block_nb * ctx->tex_rat;
+ const uint8_t *d = ctx->tex_data;
+ int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
+ int x, y;
+ int start_slice, end_slice;
+
+ start_slice = slice * ctx->slice_size;
+ end_slice = FFMIN(start_slice + ctx->slice_size, avctx->coded_height);
+
+ start_slice /= TEXTURE_BLOCK_H;
+ end_slice /= TEXTURE_BLOCK_H;
+
+ for (y = start_slice; y < end_slice; y++) {
+ uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
+ int off = y * w_block;
+ for (x = 0; x < w_block; x++) {
+ ctx->tex_fun(p + x * 16, frame->linesize[0],
+ d + (off + x) * ctx->tex_rat);
+ }
+ }
- ctx->tex_fun(p, frame->linesize[0], d);
return 0;
}
@@ -156,7 +170,10 @@ static int hap_decode(AVCodecContext *avctx, void *data,
HapContext *ctx = avctx->priv_data;
ThreadFrame tframe;
int ret, length;
- int blocks = avctx->coded_width * avctx->coded_height / (TEXTURE_BLOCK_W * TEXTURE_BLOCK_H);
+ int slices = FFMIN(avctx->thread_count,
+ avctx->coded_height / TEXTURE_BLOCK_H);
+
+ ctx->slice_size = avctx->coded_height / slices;
bytestream2_init(&ctx->gbc, avpkt->data, avpkt->size);
@@ -180,7 +197,7 @@ static int hap_decode(AVCodecContext *avctx, void *data,
ff_thread_finish_setup(avctx);
/* Use the decompress function on the texture, one block per thread */
- avctx->execute2(avctx, decompress_texture_thread, tframe.f, NULL, blocks);
+ avctx->execute2(avctx, decompress_texture_thread, tframe.f, NULL, slices);
/* Frame is ready to be output */
tframe.f->pict_type = AV_PICTURE_TYPE_I;