summaryrefslogtreecommitdiff
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-12-07 22:01:54 +0100
committerMartin Vignali <martin.vignali@gmail.com>2017-12-09 14:47:09 +0100
commit869efbf971208faccfdd88680178afaf5b1d4e77 (patch)
tree5fd74b9eadc7f110be8596cd8f9dc7769aa2066b /libavfilter/x86
parent713f9c5b5d646c4be55b04d691bac21ecbd74089 (diff)
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_threshold.asm21
-rw-r--r--libavfilter/x86/vf_threshold_init.c34
2 files changed, 34 insertions, 21 deletions
diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm
index 56a6c242d8..098069b083 100644
--- a/libavfilter/x86/vf_threshold.asm
+++ b/libavfilter/x86/vf_threshold.asm
@@ -25,16 +25,18 @@
SECTION_RODATA
pb_128: times 16 db 128
+pb_128_0 : times 8 db 0, 128
SECTION .text
-%macro THRESHOLD_8 0
+;%1 depth (8 or 16) ; %2 b or w ; %3 constant
+%macro THRESHOLD 3
%if ARCH_X86_64
-cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
+cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
mov wd, dword wm
mov hd, dword hm
%else
-cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
+cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
mov wd, r10m
%define ilinesizeq r5mp
%define tlinesizeq r6mp
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
%define olinesizeq r9mp
%define hd r11mp
%endif
- VBROADCASTI128 m4, [pb_128]
+ VBROADCASTI128 m4, [%3]
+%if %1 == 16
+ add wq, wq ; w *= 2 (16 bits instead of 8)
+%endif
add inq, wq
add thresholdq, wq
add minq, wq
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
movu m3, [maxq + xq]
pxor m0, m4
pxor m1, m4
- pcmpgtb m0, m1
+ pcmpgt%2 m0, m1
PBLENDVB m3, m2, m0
movu [outq + xq], m3
add xq, mmsize
@@ -77,9 +82,11 @@ RET
%endmacro
INIT_XMM sse4
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
%endif
diff --git a/libavfilter/x86/vf_threshold_init.c b/libavfilter/x86/vf_threshold_init.c
index db0559533d..8e42296791 100644
--- a/libavfilter/x86/vf_threshold_init.c
+++ b/libavfilter/x86/vf_threshold_init.c
@@ -23,20 +23,19 @@
#include "libavutil/x86/cpu.h"
#include "libavfilter/threshold.h"
-void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold,
- const uint8_t *min, const uint8_t *max,
- uint8_t *out,
- ptrdiff_t ilinesize, ptrdiff_t tlinesize,
- ptrdiff_t flinesize, ptrdiff_t slinesize,
- ptrdiff_t olinesize,
- int w, int h);
-void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold,
- const uint8_t *min, const uint8_t *max,
- uint8_t *out,
- ptrdiff_t ilinesize, ptrdiff_t tlinesize,
- ptrdiff_t flinesize, ptrdiff_t slinesize,
- ptrdiff_t olinesize,
- int w, int h);
+#define THRESHOLD_FUNC(depth, opt) \
+void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
+ const uint8_t *min, const uint8_t *max, \
+ uint8_t *out, \
+ ptrdiff_t ilinesize, ptrdiff_t tlinesize, \
+ ptrdiff_t flinesize, ptrdiff_t slinesize, \
+ ptrdiff_t olinesize, \
+ int w, int h);
+
+THRESHOLD_FUNC(8, sse4)
+THRESHOLD_FUNC(8, avx2)
+THRESHOLD_FUNC(16, sse4)
+THRESHOLD_FUNC(16, avx2)
av_cold void ff_threshold_init_x86(ThresholdContext *s)
{
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
s->threshold = ff_threshold8_avx2;
}
+ } else if (s->depth == 16) {
+ if (EXTERNAL_SSE4(cpu_flags)) {
+ s->threshold = ff_threshold16_sse4;
+ }
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ s->threshold = ff_threshold16_avx2;
+ }
}
}