summaryrefslogtreecommitdiff
path: root/libavcodec/h264_direct.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-02-25 23:44:42 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-02-25 23:44:42 +0000
commit358b5b1a5958fa12b841aefe09e5e81c7d8bd346 (patch)
treec6210b5aa5107d8e34d04494ac13880e687c6c4c /libavcodec/h264_direct.c
parentf3a572ca146555e68c8100d27f948c7fcd4a4ce5 (diff)
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
ones based on mb_stride in h264. about 20 cpu cycles faster overall per MB Originally committed as revision 22065 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264_direct.c')
-rw-r--r--libavcodec/h264_direct.c36
1 files changed, 19 insertions, 17 deletions
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 8977be114f..4d716796cd 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -142,7 +142,7 @@ void ff_h264_direct_ref_list_init(H264Context * const h){
static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
MpegEncContext * const s = &h->s;
- int b8_stride = h->b8_stride;
+ int b8_stride = 2;
int b4_stride = h->b_stride;
int mb_xy = h->mb_xy;
int mb_type_col[2];
@@ -228,7 +228,7 @@ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
- b8_stride *= 3;
+ b8_stride = 2+4*s->mb_stride;
b4_stride *= 6;
sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
@@ -262,12 +262,12 @@ single_col:
l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
- l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
- l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
+ l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
+ l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy];
if(!b8_stride){
if(s->mb_y&1){
- l1ref0 += h->b8_stride;
- l1ref1 += h->b8_stride;
+ l1ref0 += 2;
+ l1ref1 += 2;
l1mv0 += 2*b4_stride;
l1mv1 += 2*b4_stride;
}
@@ -342,11 +342,12 @@ single_col:
fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+ assert(b8_stride==2);
/* col_zero_flag */
- if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0
- || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
+ if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[i8] == 0
+ || (l1ref0[i8] < 0 && l1ref1[i8] == 0
&& h->x264_build>33U))){
- const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
+ const int16_t (*l1mv)[2]= l1ref0[i8] == 0 ? l1mv0 : l1mv1;
if(IS_SUB_8X8(sub_mb_type)){
const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
@@ -381,7 +382,7 @@ single_col:
static void pred_temp_direct_motion(H264Context * const h, int *mb_type){
MpegEncContext * const s = &h->s;
- int b8_stride = h->b8_stride;
+ int b8_stride = 2;
int b4_stride = h->b_stride;
int mb_xy = h->mb_xy;
int mb_type_col[2];
@@ -406,7 +407,7 @@ static void pred_temp_direct_motion(H264Context * const h, int *mb_type){
mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
- b8_stride *= 3;
+ b8_stride = 2+4*s->mb_stride;
b4_stride *= 6;
sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
@@ -441,12 +442,12 @@ single_col:
l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
- l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
- l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
+ l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
+ l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy];
if(!b8_stride){
if(s->mb_y&1){
- l1ref0 += h->b8_stride;
- l1ref1 += h->b8_stride;
+ l1ref0 += 2;
+ l1ref1 += 2;
l1mv0 += 2*b4_stride;
l1mv1 += 2*b4_stride;
}
@@ -549,11 +550,12 @@ single_col:
continue;
}
- ref0 = l1ref0[x8 + y8*b8_stride];
+ assert(b8_stride == 2);
+ ref0 = l1ref0[i8];
if(ref0 >= 0)
ref0 = map_col_to_list0[0][ref0 + ref_offset];
else{
- ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
+ ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
l1mv= l1mv1;
}
scale = dist_scale_factor[ref0];