From dcaf9660b610154a89495b3dd94bde2705746fc7 Mon Sep 17 00:00:00 2001 From: James Almer Date: Sun, 8 Jun 2014 04:05:16 -0300 Subject: x86/float_dsp: port vector_fmul_window to yasm Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavutil/x86/float_dsp.asm | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'libavutil/x86/float_dsp.asm') diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index a5fcffb30b..aec0273235 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -191,6 +191,61 @@ INIT_YMM avx VECTOR_DMUL_SCALAR %endif +;----------------------------------------------------------------------------- +; vector_fmul_window(float *dst, const float *src0, +; const float *src1, const float *win, int len); +;----------------------------------------------------------------------------- +%macro VECTOR_FMUL_WINDOW 0 +cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1 + shl lend, 2 + lea len1q, [lenq - mmsize] + add src0q, lenq + add dstq, lenq + add winq, lenq + neg lenq +.loop + mova m0, [winq + lenq] + mova m4, [src0q + lenq] +%if cpuflag(sse) + mova m1, [winq + len1q] + mova m5, [src1q + len1q] + shufps m1, m1, 0x1b + shufps m5, m5, 0x1b + mova m2, m0 + mova m3, m1 + mulps m2, m4 + mulps m3, m5 + mulps m1, m4 + mulps m0, m5 + addps m2, m3 + subps m1, m0 + shufps m2, m2, 0x1b +%else + pswapd m1, [winq + len1q] + pswapd m5, [src1q + len1q] + mova m2, m0 + mova m3, m1 + pfmul m2, m4 + pfmul m3, m5 + pfmul m1, m4 + pfmul m0, m5 + pfadd m2, m3 + pfsub m1, m0 + pswapd m2, m2 +%endif + mova [dstq + lenq], m1 + mova [dstq + len1q], m2 + sub len1q, mmsize + add lenq, mmsize + jl .loop + REP_RET +%endmacro + +INIT_MMX 3dnowext +VECTOR_FMUL_WINDOW +INIT_XMM sse +VECTOR_FMUL_WINDOW + ;----------------------------------------------------------------------------- ; vector_fmul_add(float *dst, const float *src0, const float *src1, ; const float *src2, int len) -- cgit v1.2.3