summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2018-01-14 14:23:05 +0100
committerMartin Vignali <martin.vignali@gmail.com>2018-01-28 20:23:11 +0100
commit8f9c38b19629838066def1207703cfcdc19fcbc9 (patch)
tree014bb7a09a155e2227e35c2a20ade7b566ec054a /libavcodec/x86
parent3a230ce5fa10b21312236b362df9eeddd99e7ac2 (diff)
avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction
asm code by Henrik Gramner
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/lossless_videoencdsp.asm43
-rw-r--r--libavcodec/x86/lossless_videoencdsp_init.c7
2 files changed, 50 insertions, 0 deletions
diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index 4d79eee36b..fb1204f0f1 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -25,6 +25,8 @@
%include "libavutil/x86/x86util.asm"
+cextern pb_80
+
SECTION .text
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u
%undef i
%endif
+
+
+;--------------------------------------------------------------------------------------------------
+;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
+;--------------------------------------------------------------------------------------------------
+
+INIT_XMM avx
+cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
+ mova m1, [pb_80] ; prev initial
+ add dstq, widthq
+ add srcq, widthq
+ lea xd, [widthq-1]
+ neg widthq
+ and xd, 15
+ pinsrb m4, m1, xd, 15
+ mov xq, widthq
+
+ .loop:
+ movu m0, [srcq + widthq]
+ palignr m2, m0, m1, 15
+ movu m1, [srcq + widthq + 16]
+ palignr m3, m1, m0, 15
+ psubb m2, m0, m2
+ psubb m3, m1, m3
+ movu [dstq + widthq], m2
+ movu [dstq + widthq + 16], m3
+ add widthq, 2 * 16
+ jl .loop
+
+ add srcq, strideq
+ sub dstq, xq ; dst + width
+ test xd, 16
+ jz .mod32
+ mova m1, m0
+
+.mod32:
+ pshufb m1, m4
+ mov widthq, xq
+ dec heightd
+ jg .loop
+ RET
diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c
index fc728c9fd1..40407add52 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
+void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
+ ptrdiff_t stride, ptrdiff_t width, int height);
+
#if HAVE_INLINE_ASM
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
@@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
c->diff_bytes = ff_diff_bytes_sse2;
}
+ if (EXTERNAL_AVX(cpu_flags)) {
+ c->sub_left_predict = ff_sub_left_predict_avx;
+ }
+
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_avx2;
}