From 337ade52de0e71607b16ffc99ea7696fb5bfef51 Mon Sep 17 00:00:00 2001 From: Daniel Kang Date: Wed, 11 Jul 2012 11:34:04 -0700 Subject: vp8: refactor decoding a single mb_row This is in preperation for sliced threading. Signed-off-by: Luca Barbato --- libavcodec/vp8.c | 164 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 86 insertions(+), 78 deletions(-) (limited to 'libavcodec/vp8.c') diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 94200f68ac..8ebc445422 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -1574,11 +1574,95 @@ static void release_queued_segmaps(VP8Context *s, int is_close) s->maps_are_invalid = 0; } +#define MARGIN (16 << 2) +static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe, + AVFrame *prev_frame, int mb_y) +{ + VP8Context *s = avctx->priv_data; + VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; + VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; + int i, y, mb_x, mb_xy = mb_y*s->mb_width; + uint8_t *dst[3] = { + curframe->data[0] + 16*mb_y*s->linesize, + curframe->data[1] + 8*mb_y*s->uvlinesize, + curframe->data[2] + 8*mb_y*s->uvlinesize + }; + + memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock + memset(s->left_nnz, 0, sizeof(s->left_nnz)); + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); + + // left edge of 129 for intra prediction + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { + for (i = 0; i < 3; i++) + for (y = 0; y < 16>>!!i; y++) + dst[i][y*curframe->linesize[i]-1] = 129; + if (mb_y == 1) // top left edge is also 129 + s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129; + } + + s->mv_min.x = -MARGIN; + s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { + /* Prefetch the current frame, 4 MBs ahead */ + s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); + s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); + + decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, + prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL); + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); + + if (!mb->skip) + decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); + + if (mb->mode <= MODE_I4x4) + intra_predict(s, dst, mb, mb_x, mb_y); + else + inter_predict(s, dst, mb, mb_x, mb_y); + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); + + if (!mb->skip) { + idct_mb(s, dst, mb); + } else { + AV_ZERO64(s->left_nnz); + AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned + + // Reset DC block predictors if they would exist if the mb had coefficients + if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { + s->left_nnz[8] = 0; + s->top_nnz[mb_x][8] = 0; + } + } + + if (s->deblock_filter) + filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); + + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; + s->mv_min.x -= 64; + s->mv_max.x -= 64; + } + if (s->deblock_filter) { + if (s->filter.simple) + filter_mb_row_simple(s, curframe, mb_y); + else + filter_mb_row(s, curframe, mb_y); + } + s->mv_min.y -= 64; + s->mv_max.y -= 64; +} + static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { VP8Context *s = avctx->priv_data; - int ret, mb_x, mb_y, i, y, referenced; + int ret, mb_y, i, referenced; enum AVDiscard skip_thresh; AVFrame *av_uninit(curframe), *prev_frame; @@ -1686,90 +1770,14 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, if (s->keyframe) memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); -#define MARGIN (16 << 2) s->mv_min.y = -MARGIN; s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; for (mb_y = 0; mb_y < s->mb_height; mb_y++) { - VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; - VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; - int mb_xy = mb_y*s->mb_width; - uint8_t *dst[3] = { - curframe->data[0] + 16*mb_y*s->linesize, - curframe->data[1] + 8*mb_y*s->uvlinesize, - curframe->data[2] + 8*mb_y*s->uvlinesize - }; - - memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock - memset(s->left_nnz, 0, sizeof(s->left_nnz)); - AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); - - // left edge of 129 for intra prediction - if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { - for (i = 0; i < 3; i++) - for (y = 0; y < 16>>!!i; y++) - dst[i][y*curframe->linesize[i]-1] = 129; - if (mb_y == 1) // top left edge is also 129 - s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129; - } - - s->mv_min.x = -MARGIN; - s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) ff_thread_await_progress(prev_frame, mb_y, 0); - for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { - /* Prefetch the current frame, 4 MBs ahead */ - s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); - s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); - - decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, - prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL); - - prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); - - if (!mb->skip) - decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); - - if (mb->mode <= MODE_I4x4) - intra_predict(s, dst, mb, mb_x, mb_y); - else - inter_predict(s, dst, mb, mb_x, mb_y); - - prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); - - if (!mb->skip) { - idct_mb(s, dst, mb); - } else { - AV_ZERO64(s->left_nnz); - AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned - - // Reset DC block predictors if they would exist if the mb had coefficients - if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { - s->left_nnz[8] = 0; - s->top_nnz[mb_x][8] = 0; - } - } - - if (s->deblock_filter) - filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); - - prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); - - dst[0] += 16; - dst[1] += 8; - dst[2] += 8; - s->mv_min.x -= 64; - s->mv_max.x -= 64; - } - if (s->deblock_filter) { - if (s->filter.simple) - filter_mb_row_simple(s, curframe, mb_y); - else - filter_mb_row(s, curframe, mb_y); - } - s->mv_min.y -= 64; - s->mv_max.y -= 64; + vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y); ff_thread_report_progress(curframe, mb_y, 0); } -- cgit v1.2.3