From f5a719287de251df7bac3800f8411c434b18b7ca Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sun, 10 Apr 2005 14:21:53 +0000 Subject: rescale coefficients during IDWT, that way the lifting steps are much simpler and faster Originally committed as revision 4119 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/snow.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/snow.c b/libavcodec/snow.c index f96d9343a7..26efe249df 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -756,6 +756,29 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds } } +static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + + assert(shift == 4); +#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23)) + if(mirror_left){ + dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i>1; lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0); - lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); + liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); } @@ -1177,7 +1201,11 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w int i; for(i=0; i>W_BS; +#else + b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23); +#endif } } @@ -1373,7 +1401,7 @@ static void horizontal_compose97i(DWTELEM *b, int width){ lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); - lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1); + liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1); lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1); } @@ -1404,7 +1432,11 @@ static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid int i; for(i=0; i>W_BS; +#else + b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; +#endif } } @@ -1430,7 +1462,11 @@ static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM * r+= r>>8; b3[i] -= (r+W_CO)>>W_CS; #endif +#ifdef liftS b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; +#else + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; +#endif b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; } } -- cgit v1.2.3