From f1d80bc6305221506ad96f0ab82088f9229881ab Mon Sep 17 00:00:00 2001 From: James Almer Date: Tue, 11 Apr 2017 21:29:09 -0300 Subject: x86/float_dsp: add ff_vector_fmul_reverse_avx2 ~20% faster than AVX. Signed-off-by: James Almer --- libavutil/x86/float_dsp_init.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'libavutil/x86/float_dsp_init.c') diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 09c7a4d3b2..122087a196 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -67,6 +67,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len); void ff_vector_fmul_reverse_avx(float *dst, const float *src0, const float *src1, int len); +void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, + const float *src1, int len); float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); @@ -101,6 +103,9 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2; + } if (EXTERNAL_FMA3_FAST(cpu_flags)) { fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; -- cgit v1.2.3