From 947f933687b9fd4d80b6cad468ddc2b5b20a9c38 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sat, 22 Sep 2012 18:41:25 -0400 Subject: x86: float_dsp: add SSE version of vector_fmul_scalar() --- libavutil/x86/float_dsp.asm | 29 +++++++++++++++++++++++++++++ libavutil/x86/float_dsp_init.c | 4 ++++ 2 files changed, 33 insertions(+) (limited to 'libavutil') diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index a8857b937c..317df9c3c1 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -85,3 +85,32 @@ INIT_XMM sse VECTOR_FMAC_SCALAR INIT_YMM avx VECTOR_FMAC_SCALAR + +;------------------------------------------------------------------------------ +; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len) +;------------------------------------------------------------------------------ + +%macro VECTOR_FMUL_SCALAR 0 +%if UNIX64 +cglobal vector_fmul_scalar, 3,3,2, dst, src, len +%else +cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len +%endif +%if ARCH_X86_32 + movss m0, mulm +%elif WIN64 + SWAP 0, 2 +%endif + shufps m0, m0, 0 + lea lenq, [lend*4-mmsize] +.loop: + mova m1, [srcq+lenq] + mulps m1, m0 + mova [dstq+lenq], m1 + sub lenq, mmsize + jge .loop + REP_RET +%endmacro + +INIT_XMM sse +VECTOR_FMUL_SCALAR diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index d1b0b8c622..d14ec6a377 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, int len); +extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int mm_flags = av_get_cpu_flags(); @@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) if (EXTERNAL_SSE(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; } if (EXTERNAL_AVX(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; -- cgit v1.2.3