summaryrefslogtreecommitdiff
path: root/libavutil/x86/float_dsp.asm
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2013-04-12 21:07:01 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-04-17 00:03:25 +0200
commit1a4007964c106d01f46a5a7f03c1c41fd869b35c (patch)
tree61e4cfb2b459089c084b358947ad6588b01fd14b /libavutil/x86/float_dsp.asm
parent295ce83e2f06c352e11ac1918c3f1119f8b276ab (diff)
x86: float dsp: butterflies_float SSE
97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86/float_dsp.asm')
-rw-r--r--libavutil/x86/float_dsp.asm23
1 files changed, 23 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 004e6cf1fe..f0310ef1b8 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
%endif
RET
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse
+cglobal butterflies_float, 3,3,3, src0, src1, len
+ movsxdifnidn lenq, lend
+ test lenq, lenq
+ jz .end
+ shl lenq, 2
+ lea src0q, [src0q + lenq]
+ lea src1q, [src1q + lenq]
+ neg lenq
+.loop:
+ mova m0, [src0q + lenq]
+ mova m1, [src1q + lenq]
+ subps m2, m0, m1
+ addps m0, m0, m1
+ mova [src1q + lenq], m2
+ mova [src0q + lenq], m0
+ add lenq, mmsize
+ jl .loop
+.end:
+ REP_RET