summaryrefslogtreecommitdiff
path: root/libavfilter
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2023-09-10 19:25:20 +0200
committerPaul B Mahol <onemda@gmail.com>2023-09-17 11:11:24 +0200
commitc5effe7d3db6610e9ec5a1efbe11d2b87bb34d61 (patch)
tree9ddb647d78e4df4b2c6ca2a89edd41c1f97b6deb /libavfilter
parent5f810435c2a6d985fabd9e6c025e0da0c99c39a9 (diff)
avfilter/x86/af_afir: add FMA3 SIMD
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/af_afir.asm27
-rw-r--r--libavfilter/x86/af_afir_init.c5
2 files changed, 32 insertions, 0 deletions
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 2cc09709a2..ed0276c7b9 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -67,3 +67,30 @@ INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD
+
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+cglobal fcmul_add, 4,4,4, sum, t, c, len
+ shl lend, 3
+ add tq, lenq
+ add cq, lenq
+ add sumq, lenq
+ neg lenq
+.loop:
+ movaps m0, [tq + lenq]
+ movaps m1, [cq + lenq]
+ vpermilps m3, m0, 177
+ vpermilps m2, m1, 160
+ vpermilps m1, m1, 245
+ mulps m1, m1, m3
+ vfmaddsub132ps m0, m1, m2
+ addps m0, m0, [sumq + lenq]
+ movaps [sumq + lenq], m0
+ add lenq, mmsize
+ jl .loop
+ movss xm0, [tq + lenq]
+ mulss xm0, [cq + lenq]
+ addss xm0, [sumq + lenq]
+ movss [sumq + lenq], xm0
+ RET
+%endif
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index e53817b9c0..d573acf10b 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
@@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
+ if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_fma3;
+ }
}