summaryrefslogtreecommitdiff
path: root/libavutil/x86/float_dsp.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:04:50 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:04:50 +0100
commit6e6e1708984e45881b9a5d4e26c3e7de852c54d5 (patch)
tree5e04d38f8e152faf98921843ca5e4530cbdc46a4 /libavutil/x86/float_dsp.asm
parentb1b870fbd7185bffbe27c5918001b40a8ff8b920 (diff)
parent42d324694883cdf1fff1612ac70fa403692a1ad4 (diff)
Merge commit '42d324694883cdf1fff1612ac70fa403692a1ad4'
* commit '42d324694883cdf1fff1612ac70fa403692a1ad4': floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Conflicts: libavcodec/arm/dsputil_init_vfp.c libavcodec/arm/dsputil_vfp.S libavcodec/dsputil.c libavcodec/ppc/float_altivec.c libavcodec/x86/dsputil.asm libavutil/x86/float_dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86/float_dsp.asm')
-rw-r--r--libavutil/x86/float_dsp.asm39
1 files changed, 39 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index f69fc6b00a..3e5e91ad07 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -198,3 +198,42 @@ VECTOR_FMUL_ADD
INIT_YMM avx
VECTOR_FMUL_ADD
%endif
+
+;-----------------------------------------------------------------------------
+; void vector_fmul_reverse(float *dst, const float *src0, const float *src1,
+; int len)
+;-----------------------------------------------------------------------------
+%macro VECTOR_FMUL_REVERSE 0
+cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
+ lea lenq, [lend*4 - 2*mmsize]
+ALIGN 16
+.loop:
+%if cpuflag(avx)
+ vmovaps xmm0, [src1q + 16]
+ vinsertf128 m0, m0, [src1q], 1
+ vshufps m0, m0, m0, q0123
+ vmovaps xmm1, [src1q + mmsize + 16]
+ vinsertf128 m1, m1, [src1q + mmsize], 1
+ vshufps m1, m1, m1, q0123
+%else
+ mova m0, [src1q]
+ mova m1, [src1q + mmsize]
+ shufps m0, m0, q0123
+ shufps m1, m1, q0123
+%endif
+ mulps m0, m0, [src0q + lenq + mmsize]
+ mulps m1, m1, [src0q + lenq]
+ mova [dstq + lenq + mmsize], m0
+ mova [dstq + lenq], m1
+ add src1q, 2*mmsize
+ sub lenq, 2*mmsize
+ jge .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse
+VECTOR_FMUL_REVERSE
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+VECTOR_FMUL_REVERSE
+%endif \ No newline at end of file