From 566b7a20fd0cab44d344329538d314454a0bcc2f Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Fri, 12 Apr 2013 21:07:01 +0200 Subject: x86: float dsp: butterflies_float SSE 97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't. --- libavutil/x86/float_dsp.asm | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'libavutil/x86/float_dsp.asm') diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 779339c575..10330ff336 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -252,3 +252,29 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset fld dword r0m %endif RET + +;----------------------------------------------------------------------------- +; void ff_butterflies_float(float *src0, float *src1, int len); +;----------------------------------------------------------------------------- +INIT_XMM sse +cglobal butterflies_float, 3,3,3, src0, src1, len +%if ARCH_X86_64 + movsxd lenq, lend +%endif + test lenq, lenq + jz .end + shl lenq, 2 + lea src0q, [src0q + lenq] + lea src1q, [src1q + lenq] + neg lenq +.loop: + mova m0, [src0q + lenq] + mova m1, [src1q + lenq] + subps m2, m0, m1 + addps m0, m0, m1 + mova [src1q + lenq], m2 + mova [src0q + lenq], m0 + add lenq, mmsize + jl .loop +.end: + REP_RET -- cgit v1.2.3