summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-11-01 21:41:01 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-11-01 22:01:11 +0100
commit754539a4095a40b111c40c169ba079c3e0018e74 (patch)
treef1588e7acfc3e16205eedd42797079a3bcdbdc51
parent0dc22e92f464283c82f0b0b9dd2d8a2d3fd1674f (diff)
dirac: Fix mmx/sse haar wavelet compose
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/dwt.c53
-rw-r--r--libavcodec/x86/dwt_yasm.asm20
2 files changed, 37 insertions, 36 deletions
diff --git a/libavcodec/x86/dwt.c b/libavcodec/x86/dwt.c
index cc0a71186e..1d04c7dcc9 100644
--- a/libavcodec/x86/dwt.c
+++ b/libavcodec/x86/dwt.c
@@ -30,6 +30,8 @@ void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b
void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
+void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
\
static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
{ \
@@ -83,6 +85,28 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
\
ff_vertical_compose_haar##ext(b0, b1, width_align); \
} \
+static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar0i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = tmp[x];\
+ b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
+ }\
+}\
+static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar1i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = (tmp[x] + 1)>>1;\
+ b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
+ }\
+}\
\
#if HAVE_YASM
@@ -95,11 +119,6 @@ COMPOSE_VERTICAL(_sse2, 8)
void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
-void ff_horizontal_compose_haar0i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
-void ff_horizontal_compose_haar1i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
-void ff_horizontal_compose_haar0i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
-void ff_horizontal_compose_haar1i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
-
void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
{
for (; x < w2; x++) {
@@ -108,22 +127,6 @@ void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x
}
}
-void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
-{
- for (; x < w2; x++) {
- b[2*x ] = tmp[x];
- b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);
- }
-}
-
-void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
-{
- for (; x < w2; x++) {
- b[2*x ] = (tmp[x] + 1)>>1;
- b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;
- }
-}
-
void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
{
#if HAVE_YASM
@@ -148,11 +151,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
break;
case DWT_DIRAC_HAAR0:
d->vertical_compose = vertical_compose_haar_mmx;
- d->horizontal_compose = ff_horizontal_compose_haar0i_mmx;
+ d->horizontal_compose = horizontal_compose_haar0i_mmx;
break;
case DWT_DIRAC_HAAR1:
d->vertical_compose = vertical_compose_haar_mmx;
- d->horizontal_compose = ff_horizontal_compose_haar1i_mmx;
+ d->horizontal_compose = horizontal_compose_haar1i_mmx;
break;
}
#endif
@@ -175,11 +178,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
break;
case DWT_DIRAC_HAAR0:
d->vertical_compose = vertical_compose_haar_sse2;
-//MMXDISABLED d->horizontal_compose = ff_horizontal_compose_haar0i_sse2;
+ d->horizontal_compose = horizontal_compose_haar0i_sse2;
break;
case DWT_DIRAC_HAAR1:
d->vertical_compose = vertical_compose_haar_sse2;
- d->horizontal_compose = ff_horizontal_compose_haar1i_sse2;
+ d->horizontal_compose = horizontal_compose_haar1i_sse2;
break;
}
diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm
index b008906278..7d7471c6e2 100644
--- a/libavcodec/x86/dwt_yasm.asm
+++ b/libavcodec/x86/dwt_yasm.asm
@@ -22,8 +22,6 @@
%include "x86inc.asm"
cextern horizontal_compose_dd97i_end_c
-cextern horizontal_compose_haar0i_end_c
-cextern horizontal_compose_haar1i_end_c
SECTION_RODATA
pw_1: times 8 dw 1
@@ -188,7 +186,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
mov w2d, wd
- xor xd, xd
+ xor xq, xq
shr w2d, 1
lea b_w2q, [bq+wq]
mova m3, [pw_1]
@@ -199,13 +197,13 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
psraw m1, 1
psubw m0, m1
mova [tmpq + 2*xq], m0
- add xd, mmsize/2
- cmp xd, w2d
+ add xq, mmsize/2
+ cmp xq, w2q
jl .lowpass_loop
- xor xd, xd
- and w2d, ~(mmsize/2 - 1)
- cmp w2d, mmsize/2
+ xor xq, xq
+ and w2q, ~(mmsize/2 - 1)
+ cmp w2q, mmsize/2
jl .end
.highpass_loop:
@@ -226,11 +224,11 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
mova [bq+4*xq], m0
mova [bq+4*xq+mmsize], m2
- add xd, mmsize/2
- cmp xd, w2d
+ add xq, mmsize/2
+ cmp xq, w2q
jl .highpass_loop
.end:
- END_HORIZONTAL horizontal_compose_haar%2i_end_c
+ REP_RET
%endmacro