summaryrefslogtreecommitdiff
path: root/libavutil/x86/float_dsp.asm
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2013-04-12 21:07:01 +0200
committerAnton Khirnov <anton@khirnov.net>2013-05-03 08:08:02 +0200
commit566b7a20fd0cab44d344329538d314454a0bcc2f (patch)
treef2ca45002f7479bdaa9e1cb4abc92c794b42e338 /libavutil/x86/float_dsp.asm
parentb333f3a22a4db4cf65d6a0457ac82ecbe7c7ac44 (diff)
x86: float dsp: butterflies_float SSE
97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't.
Diffstat (limited to 'libavutil/x86/float_dsp.asm')
-rw-r--r--libavutil/x86/float_dsp.asm26
1 files changed, 26 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 779339c575..10330ff336 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -252,3 +252,29 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
fld dword r0m
%endif
RET
+
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse
+cglobal butterflies_float, 3,3,3, src0, src1, len
+%if ARCH_X86_64
+ movsxd lenq, lend
+%endif
+ test lenq, lenq
+ jz .end
+ shl lenq, 2
+ lea src0q, [src0q + lenq]
+ lea src1q, [src1q + lenq]
+ neg lenq
+.loop:
+ mova m0, [src0q + lenq]
+ mova m1, [src1q + lenq]
+ subps m2, m0, m1
+ addps m0, m0, m1
+ mova [src1q + lenq], m2
+ mova [src0q + lenq], m0
+ add lenq, mmsize
+ jl .loop
+.end:
+ REP_RET