From 7d7487e85c066bf3f4e5821a49081f520b6bc1e7 Mon Sep 17 00:00:00 2001 From: James Almer Date: Mon, 10 Mar 2014 17:09:20 -0300 Subject: x86/float_dsp: add ff_vector_{fmul_add, fmac_scalar}_fma3 ~7% faster than AVX Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavutil/x86/float_dsp_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'libavutil/x86/float_dsp_init.c') diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 97f7b7c7ca..88ffbc11b5 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -33,6 +33,8 @@ void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, int len); void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, int len); +void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul, + int len); void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, int len); @@ -46,6 +48,8 @@ void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1, const float *src2, int len); void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1, const float *src2, int len); +void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1, + const float *src2, int len); void ff_vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len); @@ -153,4 +157,8 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } + if (EXTERNAL_FMA3(cpu_flags)) { + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; + fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; + } } -- cgit v1.2.3