summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-05-26 03:49:35 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-05-27 05:55:11 +0200
commite64e079ece7d037686c4c0f97eac9c62af6300b1 (patch)
tree96227a1b784bbf67a6d3b6e9d4b9640b98ae6db6
parenta0c5cd3475fd93930604e4ec5ac1336f5732c04b (diff)
x86/dsputilenc: implement SSE2 version of diff_pixels
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/dsputilenc.asm25
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c3
2 files changed, 28 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 7426c01dbb..ba8a1773c2 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -419,6 +419,31 @@ cglobal diff_pixels, 4,5
jne .loop
REP_RET
+INIT_XMM sse2
+cglobal diff_pixels, 4, 5, 5
+ movsxdifnidn r3, r3d
+ pxor m4, m4
+ add r0, 128
+ mov r4, -128
+.loop:
+ movh m0, [r1]
+ movh m2, [r2]
+ movh m1, [r1+r3]
+ movh m3, [r2+r3]
+ punpcklbw m0, m4
+ punpcklbw m1, m4
+ punpcklbw m2, m4
+ punpcklbw m3, m4
+ psubw m0, m2
+ psubw m1, m3
+ mova [r0+r4+0 ], m0
+ mova [r0+r4+16], m1
+ lea r1, [r1+r3*2]
+ lea r2, [r2+r3*2]
+ add r4, 32
+ jne .loop
+ RET
+
INIT_MMX mmx
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
cglobal pix_sum16, 2, 3
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index e63d510ab9..acff94702f 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -36,6 +36,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
int stride);
+void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+ int stride);
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
int ff_sum_abs_dctelem_mmx(int16_t *block);
@@ -971,6 +973,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
if (EXTERNAL_SSE2(cpu_flags)) {
c->sse[0] = ff_sse16_sse2;
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
+ c->diff_pixels = ff_diff_pixels_sse2;
#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;