summaryrefslogtreecommitdiff
path: root/libavcodec/x86/dcadsp.asm
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-04-03 01:46:18 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-04-05 13:55:59 +0200
commita1ac12bddd8ddb184281eadf5431dce29c02f27a (patch)
tree7a2ff6bd64e7db685fcf6f36e63f6b1d6ea820ab /libavcodec/x86/dcadsp.asm
parent3c728ceec66d4e26a3edbf136a459c271c8452cd (diff)
x86/dcadsp: add ff_dca_lfe_fir0_fma3
~10% faster than the SSE version. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/dcadsp.asm')
-rw-r--r--libavcodec/x86/dcadsp.asm9
1 files changed, 9 insertions, 0 deletions
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index e49d63d8ad..2758daa6e4 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -132,11 +132,16 @@ DECODE_HF
mulps va, %2
mulps vb, %2
%if %0 == 3
+%if cpuflag(fma3)
+ fmaddps va, m4, %3, va
+ fmaddps vb, m0, %3, vb
+%else
mulps m4, %3
mulps m0, %3
addps va, m4
addps vb, m0
%endif
+%endif
; va = va1 va2 va3 va4
; vb = vb1 vb2 vb3 vb4
%if %1
@@ -198,6 +203,10 @@ cglobal dca_lfe_fir%1, 3,3,6-%1, out, in, cf0
INIT_XMM sse
DCA_LFE_FIR 0
DCA_LFE_FIR 1
+%if HAVE_FMA3_EXTERNAL
+INIT_XMM fma3
+DCA_LFE_FIR 0
+%endif
%macro SETZERO 1
%if cpuflag(sse2) && notcpuflag(avx)