From 358b5b1a5958fa12b841aefe09e5e81c7d8bd346 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Thu, 25 Feb 2010 23:44:42 +0000 Subject: Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to ones based on mb_stride in h264. about 20 cpu cycles faster overall per MB Originally committed as revision 22065 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/h264_direct.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'libavcodec/h264_direct.c') diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c index 8977be114f..4d716796cd 100644 --- a/libavcodec/h264_direct.c +++ b/libavcodec/h264_direct.c @@ -142,7 +142,7 @@ void ff_h264_direct_ref_list_init(H264Context * const h){ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){ MpegEncContext * const s = &h->s; - int b8_stride = h->b8_stride; + int b8_stride = 2; int b4_stride = h->b_stride; int mb_xy = h->mb_xy; int mb_type_col[2]; @@ -228,7 +228,7 @@ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){ mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride; mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride]; - b8_stride *= 3; + b8_stride = 2+4*s->mb_stride; b4_stride *= 6; sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ @@ -262,12 +262,12 @@ single_col: l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]]; l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]]; - l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]]; - l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]]; + l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy]; + l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy]; if(!b8_stride){ if(s->mb_y&1){ - l1ref0 += h->b8_stride; - l1ref1 += h->b8_stride; + l1ref0 += 2; + l1ref1 += 2; l1mv0 += 2*b4_stride; l1mv1 += 2*b4_stride; } @@ -342,11 +342,12 @@ single_col: fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); + assert(b8_stride==2); /* col_zero_flag */ - if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0 - || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0 + if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[i8] == 0 + || (l1ref0[i8] < 0 && l1ref1[i8] == 0 && h->x264_build>33U))){ - const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1; + const int16_t (*l1mv)[2]= l1ref0[i8] == 0 ? l1mv0 : l1mv1; if(IS_SUB_8X8(sub_mb_type)){ const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ @@ -381,7 +382,7 @@ single_col: static void pred_temp_direct_motion(H264Context * const h, int *mb_type){ MpegEncContext * const s = &h->s; - int b8_stride = h->b8_stride; + int b8_stride = 2; int b4_stride = h->b_stride; int mb_xy = h->mb_xy; int mb_type_col[2]; @@ -406,7 +407,7 @@ static void pred_temp_direct_motion(H264Context * const h, int *mb_type){ mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride; mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride]; - b8_stride *= 3; + b8_stride = 2+4*s->mb_stride; b4_stride *= 6; sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ @@ -441,12 +442,12 @@ single_col: l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]]; l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]]; - l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]]; - l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]]; + l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy]; + l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy]; if(!b8_stride){ if(s->mb_y&1){ - l1ref0 += h->b8_stride; - l1ref1 += h->b8_stride; + l1ref0 += 2; + l1ref1 += 2; l1mv0 += 2*b4_stride; l1mv1 += 2*b4_stride; } @@ -549,11 +550,12 @@ single_col: continue; } - ref0 = l1ref0[x8 + y8*b8_stride]; + assert(b8_stride == 2); + ref0 = l1ref0[i8]; if(ref0 >= 0) ref0 = map_col_to_list0[0][ref0 + ref_offset]; else{ - ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset]; + ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset]; l1mv= l1mv1; } scale = dist_scale_factor[ref0]; -- cgit v1.2.3