summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2015-11-01 03:12:45 -0800
committerTimothy Gu <timothygu99@gmail.com>2015-11-07 14:31:34 -0800
commit4b80b895a9db60fb6ab2fb64d8c7c9faabbdda79 (patch)
treee0c29e4c2cb500cf00f0df8c45e55c67b9657ce8 /libavcodec/x86
parent7cea3430a56fb0ff6ef60f08620fd3875e7bfeb6 (diff)
pixblockdsp: x86: Condense diff_pixels_* to a shared macro
Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com> Reviewed-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/pixblockdsp.asm64
1 files changed, 30 insertions, 34 deletions
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 7c5377b2bb..2864d0c977 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
mova [r0+0x70], m3
RET
-INIT_MMX mmx
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
; int stride);
-cglobal diff_pixels, 4,5
- movsxdifnidn r3, r3d
- pxor m7, m7
- add r0, 128
- mov r4, -128
-.loop:
- mova m0, [r1]
- mova m2, [r2]
- mova m1, m0
- mova m3, m2
- punpcklbw m0, m7
- punpckhbw m1, m7
- punpcklbw m2, m7
- punpckhbw m3, m7
- psubw m0, m2
- psubw m1, m3
- mova [r0+r4+0], m0
- mova [r0+r4+8], m1
- add r1, r3
- add r2, r3
- add r4, 16
- jne .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal diff_pixels, 4, 5, 5
+%macro DIFF_PIXELS 0
+cglobal diff_pixels, 4,5,5
movsxdifnidn r3, r3d
pxor m4, m4
add r0, 128
mov r4, -128
.loop:
- movh m0, [r1]
- movh m2, [r2]
- movh m1, [r1+r3]
- movh m3, [r2+r3]
+ movq m0, [r1]
+ movq m2, [r2]
+%if mmsize == 8
+ movq m1, m0
+ movq m3, m2
+ punpcklbw m0, m4
+ punpckhbw m1, m4
+ punpcklbw m2, m4
+ punpckhbw m3, m4
+%else
+ movq m1, [r1+r3]
+ movq m3, [r2+r3]
punpcklbw m0, m4
punpcklbw m1, m4
punpcklbw m2, m4
punpcklbw m3, m4
+%endif
psubw m0, m2
psubw m1, m3
- mova [r0+r4+0 ], m0
- mova [r0+r4+16], m1
+ mova [r0+r4+0], m0
+ mova [r0+r4+mmsize], m1
+%if mmsize == 8
+ add r1, r3
+ add r2, r3
+%else
lea r1, [r1+r3*2]
lea r2, [r2+r3*2]
- add r4, 32
+%endif
+ add r4, 2 * mmsize
jne .loop
RET
+%endmacro
+
+INIT_MMX mmx
+DIFF_PIXELS
+
+INIT_XMM sse2
+DIFF_PIXELS