From 6b2b26e7af3ede0abfb46eb5725c26d1083f50bc Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Fri, 17 Jul 2015 03:07:08 +0200 Subject: dds: Decode using optimal slices sizes Enjoy some cache locality and use less threads. About the same speedup provided to HAP. Signed-off-by: Luca Barbato --- libavcodec/dds.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'libavcodec/dds.c') diff --git a/libavcodec/dds.c b/libavcodec/dds.c index 501057e2a2..e1090eedf0 100644 --- a/libavcodec/dds.c +++ b/libavcodec/dds.c @@ -105,6 +105,7 @@ typedef struct DDSContext { const uint8_t *tex_data; // Compressed texture int tex_ratio; // Compression ratio + int slice_size; // Optimal slice size /* Pointer to the selected compress or decompress function. */ int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block); @@ -414,16 +415,30 @@ static int parse_pixel_format(AVCodecContext *avctx) } static int decompress_texture_thread(AVCodecContext *avctx, void *arg, - int block_nb, int thread_nb) + int slice, int thread_nb) { DDSContext *ctx = avctx->priv_data; AVFrame *frame = arg; - int x = (TEXTURE_BLOCK_W * block_nb) % avctx->coded_width; - int y = TEXTURE_BLOCK_H * (TEXTURE_BLOCK_W * block_nb / avctx->coded_width); - uint8_t *p = frame->data[0] + x * 4 + y * frame->linesize[0]; - const uint8_t *d = ctx->tex_data + block_nb * ctx->tex_ratio; + const uint8_t *d = ctx->tex_data; + int w_block = avctx->coded_width / TEXTURE_BLOCK_W; + int x, y; + int start_slice, end_slice; + + start_slice = slice * ctx->slice_size; + end_slice = FFMIN(start_slice + ctx->slice_size, avctx->coded_height); + + start_slice /= TEXTURE_BLOCK_H; + end_slice /= TEXTURE_BLOCK_H; + + for (y = start_slice; y < end_slice; y++) { + uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H; + int off = y * w_block; + for (x = 0; x < w_block; x++) { + ctx->tex_funct(p + x * 16, frame->linesize[0], + d + (off + x) * ctx->tex_ratio); + } + } - ctx->tex_funct(p, frame->linesize[0], d); return 0; } @@ -568,7 +583,7 @@ static int dds_decode(AVCodecContext *avctx, void *data, DDSContext *ctx = avctx->priv_data; GetByteContext *gbc = &ctx->gbc; AVFrame *frame = data; - int blocks, mipmap; + int mipmap; int ret; ff_texturedsp_init(&ctx->texdsp); @@ -618,10 +633,13 @@ static int dds_decode(AVCodecContext *avctx, void *data, return ret; if (ctx->compressed) { + int slices = FFMIN(avctx->thread_count, + avctx->coded_height / TEXTURE_BLOCK_H); + ctx->slice_size = avctx->coded_height / slices; + /* Use the decompress function on the texture, one block per thread. */ ctx->tex_data = gbc->buffer; - blocks = avctx->coded_width * avctx->coded_height / (TEXTURE_BLOCK_W * TEXTURE_BLOCK_H); - avctx->execute2(avctx, decompress_texture_thread, frame, NULL, blocks); + avctx->execute2(avctx, decompress_texture_thread, frame, NULL, slices); } else { int linesize = av_image_get_linesize(avctx->pix_fmt, frame->width, 0); -- cgit v1.2.3