summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-05-22 17:48:20 +0000
committerMichael Niedermayer <michaelni@gmx.at>2014-05-24 03:45:17 +0200
commit9722a6a3f35c824d6809a54964900f2490cc82dd (patch)
treec1cd33a5a9942320a839ef58733a8b7c6682a6a8
parentf0aca50e0b21d7c97b091f8e551719e0da574e12 (diff)
x86: hpeldsp: implement SSE2 put_pixels16_xy2
This is obviously equivalent to the avg version, without the avg. 3223(mmx) -> 2006(sse2) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/hpeldsp.asm21
-rw-r--r--libavcodec/x86/hpeldsp_init.c3
2 files changed, 18 insertions, 6 deletions
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 1d26c4516e..4af423aee5 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -551,11 +551,11 @@ AVG_APPROX_PIXELS8_XY2
; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro AVG_PIXELS_XY2 0
+%macro SET_PIXELS_XY2 1
%if cpuflag(sse2)
-cglobal avg_pixels16_xy2, 4,5,8
+cglobal %1_pixels16_xy2, 4,5,8
%else
-cglobal avg_pixels8_xy2, 4,5
+cglobal %1_pixels8_xy2, 4,5
%endif
pxor m7, m7
mova m6, [pw_2]
@@ -588,9 +588,13 @@ cglobal avg_pixels8_xy2, 4,5
paddusw m5, m1
psrlw m4, 2
psrlw m5, 2
+%ifidn %1, avg
mova m3, [r0+r4]
packuswb m4, m5
PAVGB m4, m3
+%else
+ packuswb m4, m5
+%endif
mova [r0+r4], m4
add r4, r2
@@ -610,9 +614,13 @@ cglobal avg_pixels8_xy2, 4,5
paddusw m1, m5
psrlw m0, 2
psrlw m1, 2
+%ifidn %1, avg
mova m3, [r0+r4]
packuswb m0, m1
PAVGB m0, m3
+%else
+ packuswb m0, m1
+%endif
mova [r0+r4], m0
add r4, r2
sub r3d, 2
@@ -621,8 +629,9 @@ cglobal avg_pixels8_xy2, 4,5
%endmacro
INIT_MMX mmxext
-AVG_PIXELS_XY2
+SET_PIXELS_XY2 avg
INIT_MMX 3dnow
-AVG_PIXELS_XY2
+SET_PIXELS_XY2 avg
INIT_XMM sse2
-AVG_PIXELS_XY2
+SET_PIXELS_XY2 put
+SET_PIXELS_XY2 avg
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 05bd561f59..cda16dc722 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -48,6 +48,8 @@ void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -296,6 +298,7 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_sse2;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_sse2;
+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_sse2;
c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2;
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2;
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2;