summaryrefslogtreecommitdiff
path: root/libavcodec/x86/lossless_videodsp.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-01-21 02:53:43 +0100
committerMichael Niedermayer <michaelni@gmx.at>2014-01-21 02:55:41 +0100
commit83b67ca056093d3b8fffc5e0b37f84177113a556 (patch)
tree24789c49a29d2d891ff82ba1df349e3698247a7e /libavcodec/x86/lossless_videodsp.asm
parent63d2be7533b7406f3fb58f2e3b7e3954dd1fcc6d (diff)
avcodec/x86/lossless_videodsp: Port lorens add_hfyu_left_prediction_ssse3/sse4 to 16bit
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/lossless_videodsp.asm')
-rw-r--r--libavcodec/x86/lossless_videodsp.asm84
1 files changed, 84 insertions, 0 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 8c429fa4d7..e71de76874 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -1,5 +1,6 @@
;******************************************************************************
;* SIMD lossless video DSP utils
+;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Michael Niedermayer
;*
;* This file is part of FFmpeg.
@@ -21,6 +22,13 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA
+
+pb_ef: times 8 db 14,15
+pb_67: times 8 db 6, 7
+pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
+pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
+
SECTION_TEXT
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
@@ -84,3 +92,79 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
ADD_INT16_LOOP 1
.unaligned:
ADD_INT16_LOOP 0
+
+%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
+ add wq, wq
+ add srcq, wq
+ add dstq, wq
+ neg wq
+%%.loop:
+%if %2
+ mova m1, [srcq+wq]
+%else
+ movu m1, [srcq+wq]
+%endif
+ mova m2, m1
+ pslld m1, 16
+ paddw m1, m2
+ mova m2, m1
+
+ pshufb m1, m3
+ paddw m1, m2
+ pshufb m0, m5
+%if mmsize == 16
+ mova m2, m1
+ pshufb m1, m4
+ paddw m1, m2
+%endif
+ paddw m0, m1
+ pand m0, m7
+%if %1
+ mova [dstq+wq], m0
+%else
+ movq [dstq+wq], m0
+ movhps [dstq+wq+8], m0
+%endif
+ add wq, mmsize
+ jl %%.loop
+ mov eax, mmsize-1
+ sub eax, wd
+ mov wd, eax
+ shl wd, 8
+ lea eax, [wd+eax-1]
+ movd m1, eax
+ pshufb m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+; int add_hfyu_left_prediction_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
+INIT_MMX ssse3
+cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
+.skip_prologue:
+ mova m5, [pb_67]
+ mova m3, [pb_zzzz2323zzzzabab]
+ movd m0, leftm
+ psllq m0, 48
+ movd m7, maskm
+ SPLATW m7 ,m7
+ ADD_HFYU_LEFT_LOOP_INT16 1, 1
+
+INIT_XMM sse4
+cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
+ mova m5, [pb_ef]
+ mova m4, [pb_zzzzzzzz67676767]
+ mova m3, [pb_zzzz2323zzzzabab]
+ movd m0, leftm
+ pslldq m0, 14
+ movd m7, maskm
+ SPLATW m7 ,m7
+ test srcq, 15
+ jnz .src_unaligned
+ test dstq, 15
+ jnz .dst_unaligned
+ ADD_HFYU_LEFT_LOOP_INT16 1, 1
+.dst_unaligned:
+ ADD_HFYU_LEFT_LOOP_INT16 0, 1
+.src_unaligned:
+ ADD_HFYU_LEFT_LOOP_INT16 0, 0