From f24a51593145dd471ce5bdbb0694caae1fbead61 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sun, 15 Oct 2006 20:40:50 +0000 Subject: shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so shifts have ~10 times longer latency then simple operations like adds) Originally committed as revision 6702 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/cabac.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'libavcodec/cabac.c') diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 0cd5144801..91db6c33ea 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -51,7 +51,7 @@ static const uint8_t lps_range[64][4]= { }; uint8_t ff_h264_mlps_state[4*64]; -uint8_t ff_h264_lps_range[2*65][4]; +uint8_t ff_h264_lps_range[4][2*64]; uint8_t ff_h264_lps_state[2*64]; uint8_t ff_h264_mps_state[2*64]; @@ -76,8 +76,8 @@ static const uint8_t lps_state[64]= { 33,33,34,34,35,35,35,36, 36,36,37,37,37,38,38,63, }; - -const uint8_t ff_h264_norm_shift[128]= { +#if 0 +const uint8_t ff_h264_norm_shift_old[128]= { 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -87,6 +87,29 @@ const uint8_t ff_h264_norm_shift[128]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; +#endif +const uint8_t ff_h264_norm_shift[512]= { + 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; /** * @@ -121,7 +144,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){ c->low = (*c->bytestream++)<<10; #endif c->low+= ((*c->bytestream++)<<2) + 2; - c->range= 0x1FE<<(CABAC_BITS + 1); + c->range= 0x1FE; } void ff_init_cabac_states(CABACContext *c){ @@ -129,8 +152,8 @@ void ff_init_cabac_states(CABACContext *c){ for(i=0; i<64; i++){ for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save - ff_h264_lps_range[2*i+0][j+4]= - ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j]; + ff_h264_lps_range[j][2*i+0]= + ff_h264_lps_range[j][2*i+1]= lps_range[i][j]; } ff_h264_mlps_state[128+2*i+0]= -- cgit v1.2.3