summaryrefslogtreecommitdiff
path: root/libavutil/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-01-20 13:20:30 -0800
committerRonald S. Bultje <rsbultje@gmail.com>2013-01-22 11:55:42 -0800
commit42d324694883cdf1fff1612ac70fa403692a1ad4 (patch)
tree99b6a8629388614ec2395c847b79a2c448583341 /libavutil/x86
parent55aa03b9f8f11ebb7535424cc0e5635558590f49 (diff)
floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp.
Now, nellymoserenc and aacenc no longer depends on dsputil. Independent of this patch, wmaprodec also does not depend on dsputil, so I removed it from there also.
Diffstat (limited to 'libavutil/x86')
-rw-r--r--libavutil/x86/float_dsp.asm37
-rw-r--r--libavutil/x86/float_dsp_init.c7
2 files changed, 44 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 70fc1d0310..126f3495c4 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -190,3 +190,40 @@ INIT_XMM sse
VECTOR_FMUL_ADD
INIT_YMM avx
VECTOR_FMUL_ADD
+
+;-----------------------------------------------------------------------------
+; void vector_fmul_reverse(float *dst, const float *src0, const float *src1,
+; int len)
+;-----------------------------------------------------------------------------
+%macro VECTOR_FMUL_REVERSE 0
+cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
+ lea lenq, [lend*4 - 2*mmsize]
+ALIGN 16
+.loop:
+%if cpuflag(avx)
+ vmovaps xmm0, [src1q + 16]
+ vinsertf128 m0, m0, [src1q], 1
+ vshufps m0, m0, m0, q0123
+ vmovaps xmm1, [src1q + mmsize + 16]
+ vinsertf128 m1, m1, [src1q + mmsize], 1
+ vshufps m1, m1, m1, q0123
+%else
+ mova m0, [src1q]
+ mova m1, [src1q + mmsize]
+ shufps m0, m0, q0123
+ shufps m1, m1, q0123
+%endif
+ mulps m0, m0, [src0q + lenq + mmsize]
+ mulps m1, m1, [src0q + lenq]
+ mova [dstq + lenq + mmsize], m0
+ mova [dstq + lenq], m1
+ add src1q, 2*mmsize
+ sub lenq, 2*mmsize
+ jge .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse
+VECTOR_FMUL_REVERSE
+INIT_YMM avx
+VECTOR_FMUL_REVERSE
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 0a2ad5ba0e..9f63b4c057 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -46,6 +46,11 @@ void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1,
const float *src2, int len);
+void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
+ const float *src1, int len);
+void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
+ const float *src1, int len);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -129,6 +134,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
+ fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
@@ -138,5 +144,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
+ fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
}
}