summaryrefslogtreecommitdiff
path: root/libavcodec/h264.c
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2012-07-03 23:16:11 +0100
committerMans Rullgard <mans@mansr.com>2012-07-05 11:50:18 +0100
commit28fff0d9740e00c2ee82f72a4be55bdbb5e0c8c6 (patch)
tree9097035020bc4443e85b9f71f3579d3116dc2c6e /libavcodec/h264.c
parent983db9b2b4c753507d1cf8427675fca80d598b4c (diff)
h264: use templates to avoid excessive inlining
Instead of inlining everything into ff_h264_hl_decode_mb(), use explicit templating to create versions of the called functions with constant parameters filled in. This greatly speeds up compilation of h264.c and reduces the code size without any measurable impact on performance. Compilation time for h264.c on an i7 goes from 30s to 5.5s. Code size is reduced by 430kB. Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/h264.c')
-rw-r--r--libavcodec/h264.c543
1 files changed, 10 insertions, 533 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 2d6a08e032..a4afcc870e 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -714,33 +714,6 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
}
}
-static av_always_inline void mc_part(H264Context *h, int n, int square,
- int height, int delta,
- uint8_t *dest_y, uint8_t *dest_cb,
- uint8_t *dest_cr,
- int x_offset, int y_offset,
- qpel_mc_func *qpix_put,
- h264_chroma_mc_func chroma_put,
- qpel_mc_func *qpix_avg,
- h264_chroma_mc_func chroma_avg,
- h264_weight_func *weight_op,
- h264_biweight_func *weight_avg,
- int list0, int list1,
- int pixel_shift, int chroma_idc)
-{
- if ((h->use_weight == 2 && list0 && list1 &&
- (h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->s.mb_y & 1] != 32)) ||
- h->use_weight == 1)
- mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
- x_offset, y_offset, qpix_put, chroma_put,
- weight_op[0], weight_op[1], weight_avg[0],
- weight_avg[1], list0, list1, pixel_shift, chroma_idc);
- else
- mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
- x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
- chroma_avg, list0, list1, pixel_shift, chroma_idc);
-}
-
static av_always_inline void prefetch_motion(H264Context *h, int list,
int pixel_shift, int chroma_idc)
{
@@ -768,146 +741,6 @@ static av_always_inline void prefetch_motion(H264Context *h, int list,
}
}
-static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y,
- uint8_t *dest_cb, uint8_t *dest_cr,
- qpel_mc_func(*qpix_put)[16],
- h264_chroma_mc_func(*chroma_put),
- qpel_mc_func(*qpix_avg)[16],
- h264_chroma_mc_func(*chroma_avg),
- h264_weight_func *weight_op,
- h264_biweight_func *weight_avg,
- int pixel_shift, int chroma_idc)
-{
- MpegEncContext *const s = &h->s;
- const int mb_xy = h->mb_xy;
- const int mb_type = s->current_picture.f.mb_type[mb_xy];
-
- assert(IS_INTER(mb_type));
-
- if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
- await_references(h);
- prefetch_motion(h, 0, pixel_shift, chroma_idc);
-
- if (IS_16X16(mb_type)) {
- mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
- qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
- weight_op, weight_avg,
- IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
- pixel_shift, chroma_idc);
- } else if (IS_16X8(mb_type)) {
- mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
- qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
- weight_op, weight_avg,
- IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
- pixel_shift, chroma_idc);
- mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
- qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
- weight_op, weight_avg,
- IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
- pixel_shift, chroma_idc);
- } else if (IS_8X16(mb_type)) {
- mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
- qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
- &weight_op[1], &weight_avg[1],
- IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
- pixel_shift, chroma_idc);
- mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
- qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
- &weight_op[1], &weight_avg[1],
- IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
- pixel_shift, chroma_idc);
- } else {
- int i;
-
- assert(IS_8X8(mb_type));
-
- for (i = 0; i < 4; i++) {
- const int sub_mb_type = h->sub_mb_type[i];
- const int n = 4 * i;
- int x_offset = (i & 1) << 2;
- int y_offset = (i & 2) << 1;
-
- if (IS_SUB_8X8(sub_mb_type)) {
- mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
- x_offset, y_offset,
- qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
- &weight_op[1], &weight_avg[1],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- } else if (IS_SUB_8X4(sub_mb_type)) {
- mc_part(h, n, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr,
- x_offset, y_offset,
- qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
- &weight_op[1], &weight_avg[1],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- mc_part(h, n + 2, 0, 4, 4 << pixel_shift,
- dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
- qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
- &weight_op[1], &weight_avg[1],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- } else if (IS_SUB_4X8(sub_mb_type)) {
- mc_part(h, n, 0, 8, 4 * h->mb_linesize,
- dest_y, dest_cb, dest_cr, x_offset, y_offset,
- qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
- &weight_op[2], &weight_avg[2],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize,
- dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
- qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
- &weight_op[2], &weight_avg[2],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- } else {
- int j;
- assert(IS_SUB_4X4(sub_mb_type));
- for (j = 0; j < 4; j++) {
- int sub_x_offset = x_offset + 2 * (j & 1);
- int sub_y_offset = y_offset + (j & 2);
- mc_part(h, n + j, 1, 4, 0,
- dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
- qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
- &weight_op[2], &weight_avg[2],
- IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
- pixel_shift, chroma_idc);
- }
- }
- }
- }
-
- prefetch_motion(h, 1, pixel_shift, chroma_idc);
-}
-
-static av_always_inline void hl_motion_420(H264Context *h, uint8_t *dest_y,
- uint8_t *dest_cb, uint8_t *dest_cr,
- qpel_mc_func(*qpix_put)[16],
- h264_chroma_mc_func(*chroma_put),
- qpel_mc_func(*qpix_avg)[16],
- h264_chroma_mc_func(*chroma_avg),
- h264_weight_func *weight_op,
- h264_biweight_func *weight_avg,
- int pixel_shift)
-{
- hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
- qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
-}
-
-static av_always_inline void hl_motion_422(H264Context *h, uint8_t *dest_y,
- uint8_t *dest_cb, uint8_t *dest_cr,
- qpel_mc_func(*qpix_put)[16],
- h264_chroma_mc_func(*chroma_put),
- qpel_mc_func(*qpix_avg)[16],
- h264_chroma_mc_func(*chroma_avg),
- h264_weight_func *weight_op,
- h264_biweight_func *weight_avg,
- int pixel_shift)
-{
- hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
- qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
-}
-
static void free_tables(H264Context *h, int free_rbsp)
{
int i;
@@ -2077,373 +1910,17 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
}
}
-static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
- int pixel_shift)
-{
- MpegEncContext *const s = &h->s;
- const int mb_x = s->mb_x;
- const int mb_y = s->mb_y;
- const int mb_xy = h->mb_xy;
- const int mb_type = s->current_picture.f.mb_type[mb_xy];
- uint8_t *dest_y, *dest_cb, *dest_cr;
- int linesize, uvlinesize /*dct_offset*/;
- int i, j;
- int *block_offset = &h->block_offset[0];
- const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
- /* is_h264 should always be true if SVQ3 is disabled. */
- const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
- void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
- const int block_h = 16 >> s->chroma_y_shift;
- const int chroma422 = CHROMA422;
-
- dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
- dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * 8 + mb_y * s->uvlinesize * block_h;
- dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * 8 + mb_y * s->uvlinesize * block_h;
-
- s->dsp.prefetch(dest_y + (s->mb_x & 3) * 4 * s->linesize + (64 << pixel_shift), s->linesize, 4);
- s->dsp.prefetch(dest_cb + (s->mb_x & 7) * s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
-
- h->list_counts[mb_xy] = h->list_count;
-
- if (!simple && MB_FIELD) {
- linesize = h->mb_linesize = s->linesize * 2;
- uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
- block_offset = &h->block_offset[48];
- if (mb_y & 1) { // FIXME move out of this function?
- dest_y -= s->linesize * 15;
- dest_cb -= s->uvlinesize * (block_h - 1);
- dest_cr -= s->uvlinesize * (block_h - 1);
- }
- if (FRAME_MBAFF) {
- int list;
- for (list = 0; list < h->list_count; list++) {
- if (!USES_LIST(mb_type, list))
- continue;
- if (IS_16X16(mb_type)) {
- int8_t *ref = &h->ref_cache[list][scan8[0]];
- fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
- } else {
- for (i = 0; i < 16; i += 4) {
- int ref = h->ref_cache[list][scan8[i]];
- if (ref >= 0)
- fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
- 8, (16 + ref) ^ (s->mb_y & 1), 1);
- }
- }
- }
- }
- } else {
- linesize = h->mb_linesize = s->linesize;
- uvlinesize = h->mb_uvlinesize = s->uvlinesize;
- // dct_offset = s->linesize * 16;
- }
-
- if (!simple && IS_INTRA_PCM(mb_type)) {
- if (pixel_shift) {
- const int bit_depth = h->sps.bit_depth_luma;
- int j;
- GetBitContext gb;
- init_get_bits(&gb, (uint8_t *)h->mb,
- ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
-
- for (i = 0; i < 16; i++) {
- uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
- for (j = 0; j < 16; j++)
- tmp_y[j] = get_bits(&gb, bit_depth);
- }
- if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
- if (!h->sps.chroma_format_idc) {
- for (i = 0; i < block_h; i++) {
- uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
- for (j = 0; j < 8; j++)
- tmp_cb[j] = 1 << (bit_depth - 1);
- }
- for (i = 0; i < block_h; i++) {
- uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
- for (j = 0; j < 8; j++)
- tmp_cr[j] = 1 << (bit_depth - 1);
- }
- } else {
- for (i = 0; i < block_h; i++) {
- uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
- for (j = 0; j < 8; j++)
- tmp_cb[j] = get_bits(&gb, bit_depth);
- }
- for (i = 0; i < block_h; i++) {
- uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
- for (j = 0; j < 8; j++)
- tmp_cr[j] = get_bits(&gb, bit_depth);
- }
- }
- }
- } else {
- for (i = 0; i < 16; i++)
- memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
- if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
- if (!h->sps.chroma_format_idc) {
- for (i = 0; i < block_h; i++) {
- memset(dest_cb + i * uvlinesize, 128, 8);
- memset(dest_cr + i * uvlinesize, 128, 8);
- }
- } else {
- uint8_t *src_cb = (uint8_t *)h->mb + 256;
- uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
- for (i = 0; i < block_h; i++) {
- memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
- memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
- }
- }
- }
- }
- } else {
- if (IS_INTRA(mb_type)) {
- if (h->deblocking_filter)
- xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
- uvlinesize, 1, 0, simple, pixel_shift);
-
- if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
- h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
- h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
- }
-
- hl_decode_mb_predict_luma(h, mb_type, is_h264, simple,
- transform_bypass, pixel_shift,
- block_offset, linesize, dest_y, 0);
-
- if (h->deblocking_filter)
- xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
- uvlinesize, 0, 0, simple, pixel_shift);
- } else if (is_h264) {
- if (chroma422) {
- hl_motion_422(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab,
- pixel_shift);
- } else {
- hl_motion_420(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab,
- pixel_shift);
- }
- }
-
- hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass,
- pixel_shift, block_offset, linesize, dest_y, 0);
-
- if ((simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) &&
- (h->cbp & 0x30)) {
- uint8_t *dest[2] = { dest_cb, dest_cr };
- if (transform_bypass) {
- if (IS_INTRA(mb_type) && h->sps.profile_idc == 244 &&
- (h->chroma_pred_mode == VERT_PRED8x8 ||
- h->chroma_pred_mode == HOR_PRED8x8)) {
- h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0],
- block_offset + 16,
- h->mb + (16 * 16 * 1 << pixel_shift),
- uvlinesize);
- h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1],
- block_offset + 32,
- h->mb + (16 * 16 * 2 << pixel_shift),
- uvlinesize);
- } else {
- idct_add = s->dsp.add_pixels4;
- for (j = 1; j < 3; j++) {
- for (i = j * 16; i < j * 16 + 4; i++)
- if (h->non_zero_count_cache[scan8[i]] ||
- dctcoef_get(h->mb, pixel_shift, i * 16))
- idct_add(dest[j - 1] + block_offset[i],
- h->mb + (i * 16 << pixel_shift),
- uvlinesize);
- if (chroma422) {
- for (i = j * 16 + 4; i < j * 16 + 8; i++)
- if (h->non_zero_count_cache[scan8[i + 4]] ||
- dctcoef_get(h->mb, pixel_shift, i * 16))
- idct_add(dest[j - 1] + block_offset[i + 4],
- h->mb + (i * 16 << pixel_shift),
- uvlinesize);
- }
- }
- }
- } else {
- if (is_h264) {
- int qp[2];
- if (chroma422) {
- qp[0] = h->chroma_qp[0] + 3;
- qp[1] = h->chroma_qp[1] + 3;
- } else {
- qp[0] = h->chroma_qp[0];
- qp[1] = h->chroma_qp[1];
- }
- if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 1 << pixel_shift),
- h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
- if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 2 << pixel_shift),
- h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
- h->h264dsp.h264_idct_add8(dest, block_offset,
- h->mb, uvlinesize,
- h->non_zero_count_cache);
- } else if (CONFIG_SVQ3_DECODER) {
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 1,
- h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][h->chroma_qp[0]][0]);
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 2,
- h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][h->chroma_qp[1]][0]);
- for (j = 1; j < 3; j++) {
- for (i = j * 16; i < j * 16 + 4; i++)
- if (h->non_zero_count_cache[scan8[i]] || h->mb[i * 16]) {
- uint8_t *const ptr = dest[j - 1] + block_offset[i];
- ff_svq3_add_idct_c(ptr, h->mb + i * 16,
- uvlinesize,
- ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
- }
- }
- }
- }
- }
- }
- if (h->cbp || IS_INTRA(mb_type)) {
- s->dsp.clear_blocks(h->mb);
- s->dsp.clear_blocks(h->mb + (24 * 16 << pixel_shift));
- }
-}
-
-static av_always_inline void hl_decode_mb_444_internal(H264Context *h,
- int simple,
- int pixel_shift)
-{
- MpegEncContext *const s = &h->s;
- const int mb_x = s->mb_x;
- const int mb_y = s->mb_y;
- const int mb_xy = h->mb_xy;
- const int mb_type = s->current_picture.f.mb_type[mb_xy];
- uint8_t *dest[3];
- int linesize;
- int i, j, p;
- int *block_offset = &h->block_offset[0];
- const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
- const int plane_count = (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) ? 3 : 1;
-
- for (p = 0; p < plane_count; p++) {
- dest[p] = s->current_picture.f.data[p] +
- ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
- s->dsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << pixel_shift),
- s->linesize, 4);
- }
-
- h->list_counts[mb_xy] = h->list_count;
-
- if (!simple && MB_FIELD) {
- linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
- block_offset = &h->block_offset[48];
- if (mb_y & 1) // FIXME move out of this function?
- for (p = 0; p < 3; p++)
- dest[p] -= s->linesize * 15;
- if (FRAME_MBAFF) {
- int list;
- for (list = 0; list < h->list_count; list++) {
- if (!USES_LIST(mb_type, list))
- continue;
- if (IS_16X16(mb_type)) {
- int8_t *ref = &h->ref_cache[list][scan8[0]];
- fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
- } else {
- for (i = 0; i < 16; i += 4) {
- int ref = h->ref_cache[list][scan8[i]];
- if (ref >= 0)
- fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
- 8, (16 + ref) ^ (s->mb_y & 1), 1);
- }
- }
- }
- }
- } else {
- linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;
- }
-
- if (!simple && IS_INTRA_PCM(mb_type)) {
- if (pixel_shift) {
- const int bit_depth = h->sps.bit_depth_luma;
- GetBitContext gb;
- init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);
-
- for (p = 0; p < plane_count; p++)
- for (i = 0; i < 16; i++) {
- uint16_t *tmp = (uint16_t *)(dest[p] + i * linesize);
- for (j = 0; j < 16; j++)
- tmp[j] = get_bits(&gb, bit_depth);
- }
- } else {
- for (p = 0; p < plane_count; p++)
- for (i = 0; i < 16; i++)
- memcpy(dest[p] + i * linesize,
- (uint8_t *)h->mb + p * 256 + i * 16, 16);
- }
- } else {
- if (IS_INTRA(mb_type)) {
- if (h->deblocking_filter)
- xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
- linesize, 1, 1, simple, pixel_shift);
-
- for (p = 0; p < plane_count; p++)
- hl_decode_mb_predict_luma(h, mb_type, 1, simple,
- transform_bypass, pixel_shift,
- block_offset, linesize, dest[p], p);
-
- if (h->deblocking_filter)
- xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
- linesize, 0, 1, simple, pixel_shift);
- } else {
- hl_motion(h, dest[0], dest[1], dest[2],
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
- }
-
- for (p = 0; p < plane_count; p++)
- hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass,
- pixel_shift, block_offset, linesize,
- dest[p], p);
- }
- if (h->cbp || IS_INTRA(mb_type)) {
- s->dsp.clear_blocks(h->mb);
- s->dsp.clear_blocks(h->mb + (24 * 16 << pixel_shift));
- }
-}
-
-/**
- * Process a macroblock; this case avoids checks for expensive uncommon cases.
- */
-#define hl_decode_mb_simple(sh, bits) \
-static void hl_decode_mb_simple_ ## bits(H264Context *h) \
-{ \
- hl_decode_mb_internal(h, 1, sh); \
-}
+#define BITS 8
+#define SIMPLE 1
+#include "h264_mb_template.c"
-hl_decode_mb_simple(0, 8)
-hl_decode_mb_simple(1, 16)
+#undef BITS
+#define BITS 16
+#include "h264_mb_template.c"
-/**
- * Process a macroblock; this handles edge cases, such as interlacing.
- */
-static av_noinline void hl_decode_mb_complex(H264Context *h)
-{
- hl_decode_mb_internal(h, 0, h->pixel_shift);
-}
-
-static av_noinline void hl_decode_mb_444_complex(H264Context *h)
-{
- hl_decode_mb_444_internal(h, 0, h->pixel_shift);
-}
-
-static av_noinline void hl_decode_mb_444_simple(H264Context *h)
-{
- hl_decode_mb_444_internal(h, 1, 0);
-}
+#undef SIMPLE
+#define SIMPLE 0
+#include "h264_mb_template.c"
void ff_h264_hl_decode_mb(H264Context *h)
{
@@ -2456,7 +1933,7 @@ void ff_h264_hl_decode_mb(H264Context *h)
if (is_complex || h->pixel_shift)
hl_decode_mb_444_complex(h);
else
- hl_decode_mb_444_simple(h);
+ hl_decode_mb_444_simple_8(h);
} else if (is_complex) {
hl_decode_mb_complex(h);
} else if (h->pixel_shift) {