summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-10-01 21:37:15 +0200
committerJames Almer <jamrial@gmail.com>2017-10-01 17:35:30 -0300
commitac5908b13f16cbda396730c35f5f3125ca24577a (patch)
tree01f49a554ed8199b7aaa5d095c859796c6c10c18 /libavcodec/x86
parent59924d5eb11646f82f70c206be8a867468f102b9 (diff)
libavcodec/exr : add x86 SIMD for predictor
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/exrdsp.asm62
-rw-r--r--libavcodec/x86/exrdsp_init.c13
2 files changed, 74 insertions, 1 deletions
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
index 06c629e59e..23c9397ef8 100644
--- a/libavcodec/x86/exrdsp.asm
+++ b/libavcodec/x86/exrdsp.asm
@@ -2,9 +2,11 @@
;* X86 Optimized functions for Open Exr Decoder
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
;*
-;* reorder_pixels based on patch by John Loy
+;* reorder_pixels, predictor based on patch by John Loy
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
;*
+;* predictor AVX/AVX2 by Henrik Gramner
+;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
@@ -24,6 +26,9 @@
%include "libavutil/x86/x86util.asm"
+cextern pb_15
+cextern pb_80
+
SECTION .text
;------------------------------------------------------------------------------
@@ -60,3 +65,58 @@ REORDER_PIXELS
INIT_YMM avx2
REORDER_PIXELS
%endif
+
+
+;------------------------------------------------------------------------------
+; void ff_predictor(uint8_t *src, ptrdiff_t size);
+;------------------------------------------------------------------------------
+
+%macro PREDICTOR 0
+cglobal predictor, 2,2,5, src, size
+%if mmsize == 32
+ vbroadcasti128 m0, [pb_80]
+%else
+ mova xm0, [pb_80]
+%endif
+ mova xm1, [pb_15]
+ mova xm2, xm0
+ add srcq, sizeq
+ neg sizeq
+.loop:
+ pxor m3, m0, [srcq + sizeq]
+ pslldq m4, m3, 1
+ paddb m3, m4
+ pslldq m4, m3, 2
+ paddb m3, m4
+ pslldq m4, m3, 4
+ paddb m3, m4
+ pslldq m4, m3, 8
+%if mmsize == 32
+ paddb m3, m4
+ paddb xm2, xm3
+ vextracti128 xm4, m3, 1
+ mova [srcq + sizeq], xm2
+ pshufb xm2, xm1
+ paddb xm2, xm4
+ mova [srcq + sizeq + 16], xm2
+%else
+ paddb m2, m3
+ paddb m2, m4
+ mova [srcq + sizeq], m2
+%endif
+ pshufb xm2, xm1
+ add sizeq, mmsize
+ jl .loop
+ RET
+%endmacro
+
+INIT_XMM ssse3
+PREDICTOR
+
+INIT_XMM avx
+PREDICTOR
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+PREDICTOR
+%endif
diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c
index 5669be3d97..63b3480d8f 100644
--- a/libavcodec/x86/exrdsp_init.c
+++ b/libavcodec/x86/exrdsp_init.c
@@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);
+
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
@@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_sse2;
}
+ if (EXTERNAL_SSSE3(cpu_flags)) {
+ dsp->predictor = ff_predictor_ssse3;
+ }
+ if (EXTERNAL_AVX(cpu_flags)) {
+ dsp->predictor = ff_predictor_avx;
+ }
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_avx2;
+ dsp->predictor = ff_predictor_avx2;
}
}