summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2007-08-21 16:29:40 +0000
committerMichael Niedermayer <michaelni@gmx.at>2007-08-21 16:29:40 +0000
commitce611a27be7c9201b5920d8232e68209529065c4 (patch)
tree7d02fb42432300f1ca74277ab0aa77abab7b9fd8
parent7506d47aa320beb00369325b2e868f4a84c36af0 (diff)
Change rounding of the horizontal DWT to match the vertical one.
This allows some simplifications and optimizations and should not have any effect on quality. Originally committed as revision 10172 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/i386/snowdsp_mmx.c44
-rw-r--r--libavcodec/snow.c12
-rw-r--r--libavcodec/snow.h4
-rw-r--r--tests/ffmpeg.regression.ref6
-rw-r--r--tests/rotozoom.regression.ref6
-rw-r--r--tests/seek.regression.ref40
6 files changed, 51 insertions, 61 deletions
diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c
index f2eb14b2b3..03f622b756 100644
--- a/libavcodec/i386/snowdsp_mmx.c
+++ b/libavcodec/i386/snowdsp_mmx.c
@@ -111,8 +111,7 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
i = 0;
asm volatile(
- "pcmpeqd %%xmm7, %%xmm7 \n\t"
- "psrad $29, %%xmm7 \n\t"
+ "pslld $1, %%xmm7 \n\t"
::);
for(; i<w_l-7; i+=8){
asm volatile(
@@ -157,25 +156,21 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
"movdqu 20(%1), %%xmm6 \n\t"
"paddd (%1), %%xmm2 \n\t"
"paddd 16(%1), %%xmm6 \n\t"
- "movdqa %%xmm2, %%xmm0 \n\t"
- "movdqa %%xmm6, %%xmm4 \n\t"
- "pslld $2, %%xmm2 \n\t"
- "pslld $2, %%xmm6 \n\t"
- "psubd %%xmm2, %%xmm0 \n\t"
- "psubd %%xmm6, %%xmm4 \n\t"
- "psrad $1, %%xmm0 \n\t"
- "psrad $1, %%xmm4 \n\t"
- "movdqu (%0), %%xmm2 \n\t"
- "movdqu 16(%0), %%xmm6 \n\t"
- "psubd %%xmm0, %%xmm2 \n\t"
- "psubd %%xmm4, %%xmm6 \n\t"
+ "movdqu (%0), %%xmm0 \n\t"
+ "movdqu 16(%0), %%xmm4 \n\t"
+ "paddd %%xmm2, %%xmm0 \n\t"
+ "paddd %%xmm6, %%xmm4 \n\t"
+ "psrad $1, %%xmm2 \n\t"
+ "psrad $1, %%xmm6 \n\t"
+ "paddd %%xmm0, %%xmm2 \n\t"
+ "paddd %%xmm4, %%xmm6 \n\t"
"movdqa %%xmm2, (%2) \n\t"
"movdqa %%xmm6, 16(%2) \n\t"
:: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
: "memory"
);
}
- snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
+ snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
}
{
@@ -291,10 +286,9 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
DWTELEM * const ref = b+w2 - 1;
i = 1;
- b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
+ b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
asm volatile(
- "pcmpeqd %%mm7, %%mm7 \n\t"
- "psrld $29, %%mm7 \n\t"
+ "pslld $1, %%mm7 \n\t"
::);
for(; i<w_l-3; i+=4){
asm volatile(
@@ -333,16 +327,12 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
"movq 12(%1), %%mm6 \n\t"
"paddd (%1), %%mm2 \n\t"
"paddd 8(%1), %%mm6 \n\t"
- "pxor %%mm0, %%mm0 \n\t" //note: the 2 xor could be avoided if we would flip the rounding direction
- "pxor %%mm4, %%mm4 \n\t"
- "psubd %%mm2, %%mm0 \n\t"
- "psubd %%mm6, %%mm4 \n\t"
- "psrad $1, %%mm0 \n\t"
- "psrad $1, %%mm4 \n\t"
- "psubd %%mm0, %%mm2 \n\t"
- "psubd %%mm4, %%mm6 \n\t"
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm4 \n\t"
+ "paddd %%mm2, %%mm0 \n\t"
+ "paddd %%mm6, %%mm4 \n\t"
+ "psrad $1, %%mm2 \n\t"
+ "psrad $1, %%mm6 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"paddd %%mm4, %%mm6 \n\t"
"movq %%mm2, (%2) \n\t"
@@ -351,7 +341,7 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
: "memory"
);
}
- snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
+ snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
}
{
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 8f2f4c0944..71049dfae9 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -775,7 +775,7 @@ static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int
int i;
assert(shift == 4);
-#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
+#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*4*(src) + 4*(ref) + add + 5 + (5<<27))/(5*16) - (1<<23)))
if(mirror_left){
dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
dst += dst_step;
@@ -1113,8 +1113,8 @@ static void horizontal_decompose97i(DWTELEM *b, int width){
DWTELEM temp[width];
const int w2= (width+1)>>1;
- lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
- liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
+ lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
+ liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
}
@@ -1150,7 +1150,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
#ifdef liftS
b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
#else
- b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
+ b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
#endif
}
}
@@ -1344,8 +1344,8 @@ void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
- liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO-1, W_BS, 0, 1);
- lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
+ liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
+ lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
}
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 19df4ad791..9dd66031cb 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -165,11 +165,11 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
for(; i<w; i++){
- dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO-1 + 4 * src[i]) >> W_BS);
+ dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
}
if(width&1){
- dst[w] = src[w] + ((2 * ref[w] + W_BO-1 + 4 * src[w]) >> W_BS);
+ dst[w] = src[w] + ((2 * ref[w] + W_BO + 4 * src[w]) >> W_BS);
}
}
diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref
index 94ad71085e..ddca176149 100644
--- a/tests/ffmpeg.regression.ref
+++ b/tests/ffmpeg.regression.ref
@@ -141,9 +141,9 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
2654678 ./tests/data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
-9078723c943de5d79490f54b99e6ea9e *./tests/data/a-snow.avi
-156656 ./tests/data/a-snow.avi
-f2932084b52e2ede167c9ba21eae0656 *./tests/data/out.yuv
+958d649d09b7361d5f00b5b3fcccbcd2 *./tests/data/a-snow.avi
+156606 ./tests/data/a-snow.avi
+b19cb7f9134f922326028c6bb44e96de *./tests/data/out.yuv
stddev: 23.14 PSNR:20.83 bytes:7602176
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
3519486 ./tests/data/a-snow53.avi
diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref
index c113e106dc..21fe28bb64 100644
--- a/tests/rotozoom.regression.ref
+++ b/tests/rotozoom.regression.ref
@@ -141,9 +141,9 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
3525804 ./tests/data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
-40a6e938ac2bd92ee12cd57925e86454 *./tests/data/a-snow.avi
-68758 ./tests/data/a-snow.avi
-1e356854142898c7c4aab4bfedadf235 *./tests/data/out.yuv
+2cfa1bdb443d04a890208a83fd239461 *./tests/data/a-snow.avi
+68872 ./tests/data/a-snow.avi
+64a0495b7ab53509d3b791465262795c *./tests/data/out.yuv
stddev: 10.86 PSNR:27.40 bytes:7602176
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
2721980 ./tests/data/a-snow53.avi
diff --git a/tests/seek.regression.ref b/tests/seek.regression.ref
index 798632b239..d9d4b05ff5 100644
--- a/tests/seek.regression.ref
+++ b/tests/seek.regression.ref
@@ -2046,51 +2046,51 @@ ret: 0 st:-1 ts:-0.645825 flags:1
ret: 0 st: 0 dts:0.040000 pts:0.040000 pos:9610 size:1075 flags:0
----------------
tests/data/a-snow.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
+ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
+ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
+ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
+ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
+ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
+ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
+ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
+ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
+ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
+ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
+ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
+ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
+ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
+ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
+ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-snow53.avi