From 015821229f96bf7e677f2a711a58dbea3009f574 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 12 Mar 2013 07:28:12 -0700 Subject: vp3: Use full transpose for all IDCTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way, the special IDCT permutations are no longer needed. This is similar to how H264 does it, and removes the dsputil dependency imposed by the scantable code. Also remove the unused type == 0 cases from the plain C version of the idct. Signed-off-by: Martin Storsjö --- libavcodec/vp3dsp.c | 92 ++++++++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 57 deletions(-) (limited to 'libavcodec/vp3dsp.c') diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index d1a7db957d..94de0e5b96 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -54,11 +54,12 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int /* Inverse DCT on the rows now */ for (i = 0; i < 8; i++) { /* Check for non-zero values */ - if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { - A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); - B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); - C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); - D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); + if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | + ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { + A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]); + B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]); + C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]); + D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]); Ad = M(xC4S4, (A - C)); Bd = M(xC4S4, (B - D)); @@ -66,11 +67,11 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Cd = A + C; Dd = B + D; - E = M(xC4S4, (ip[0] + ip[4])); - F = M(xC4S4, (ip[0] - ip[4])); + E = M(xC4S4, (ip[0 * 8] + ip[4 * 8])); + F = M(xC4S4, (ip[0 * 8] - ip[4 * 8])); - G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); - H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); + G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]); + H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]); Ed = E - G; Gd = E + G; @@ -82,33 +83,33 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ - ip[0] = Gd + Cd ; - ip[7] = Gd - Cd ; + ip[0 * 8] = Gd + Cd ; + ip[7 * 8] = Gd - Cd ; - ip[1] = Add + Hd; - ip[2] = Add - Hd; + ip[1 * 8] = Add + Hd; + ip[2 * 8] = Add - Hd; - ip[3] = Ed + Dd ; - ip[4] = Ed - Dd ; + ip[3 * 8] = Ed + Dd ; + ip[4 * 8] = Ed - Dd ; - ip[5] = Fd + Bdd; - ip[6] = Fd - Bdd; + ip[5 * 8] = Fd + Bdd; + ip[6 * 8] = Fd - Bdd; } - ip += 8; /* next row */ + ip += 1; /* next row */ } ip = input; for ( i = 0; i < 8; i++) { /* Check for non-zero values (bitwise or faster than ||) */ - if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | - ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { + if ( ip[1] | ip[2] | ip[3] | + ip[4] | ip[5] | ip[6] | ip[7] ) { - A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]); - B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]); - C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]); - D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]); + A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); + B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); + C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); + D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); Ad = M(xC4S4, (A - C)); Bd = M(xC4S4, (B - D)); @@ -116,16 +117,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Cd = A + C; Dd = B + D; - E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8; - F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8; + E = M(xC4S4, (ip[0] + ip[4])) + 8; + F = M(xC4S4, (ip[0] - ip[4])) + 8; if(type==1){ //HACK E += 16*128; F += 16*128; } - G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]); - H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]); + G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); + H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); Ed = E - G; Gd = E + G; @@ -137,19 +138,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ - if(type==0){ - ip[0*8] = (Gd + Cd ) >> 4; - ip[7*8] = (Gd - Cd ) >> 4; - - ip[1*8] = (Add + Hd ) >> 4; - ip[2*8] = (Add - Hd ) >> 4; - - ip[3*8] = (Ed + Dd ) >> 4; - ip[4*8] = (Ed - Dd ) >> 4; - - ip[5*8] = (Fd + Bdd ) >> 4; - ip[6*8] = (Fd - Bdd ) >> 4; - }else if(type==1){ + if (type == 1) { dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4); dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4); @@ -176,16 +165,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } else { - if(type==0){ - ip[0*8] = - ip[1*8] = - ip[2*8] = - ip[3*8] = - ip[4*8] = - ip[5*8] = - ip[6*8] = - ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); - }else if(type==1){ + if (type == 1) { dst[0*stride]= dst[1*stride]= dst[2*stride]= @@ -193,10 +173,10 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int dst[4*stride]= dst[5*stride]= dst[6*stride]= - dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)); + dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20)); }else{ - if(ip[0*8]){ - int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); + if(ip[0]){ + int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20); dst[0*stride] = av_clip_uint8(dst[0*stride] + v); dst[1*stride] = av_clip_uint8(dst[1*stride] + v); dst[2*stride] = av_clip_uint8(dst[2*stride] + v); @@ -209,7 +189,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } - ip++; /* next column */ + ip += 8; /* next column */ dst++; } } @@ -307,8 +287,6 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) c->v_loop_filter = vp3_v_loop_filter_c; c->h_loop_filter = vp3_h_loop_filter_c; - c->idct_perm = FF_NO_IDCT_PERM; - if (ARCH_ARM) ff_vp3dsp_init_arm(c, flags); if (ARCH_BFIN) -- cgit v1.2.3