summaryrefslogtreecommitdiff
path: root/libavutil/arm/float_dsp_vfp.S
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:04:50 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:04:50 +0100
commit6e6e1708984e45881b9a5d4e26c3e7de852c54d5 (patch)
tree5e04d38f8e152faf98921843ca5e4530cbdc46a4 /libavutil/arm/float_dsp_vfp.S
parentb1b870fbd7185bffbe27c5918001b40a8ff8b920 (diff)
parent42d324694883cdf1fff1612ac70fa403692a1ad4 (diff)
Merge commit '42d324694883cdf1fff1612ac70fa403692a1ad4'
* commit '42d324694883cdf1fff1612ac70fa403692a1ad4': floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Conflicts: libavcodec/arm/dsputil_init_vfp.c libavcodec/arm/dsputil_vfp.S libavcodec/dsputil.c libavcodec/ppc/float_altivec.c libavcodec/x86/dsputil.asm libavutil/x86/float_dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/arm/float_dsp_vfp.S')
-rw-r--r--libavutil/arm/float_dsp_vfp.S69
1 files changed, 69 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_vfp.S b/libavutil/arm/float_dsp_vfp.S
index db63e5a675..8695fbd981 100644
--- a/libavutil/arm/float_dsp_vfp.S
+++ b/libavutil/arm/float_dsp_vfp.S
@@ -66,3 +66,72 @@ function ff_vector_fmul_vfp, export=1
vpop {d8-d15}
bx lr
endfunc
+
+/**
+ * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
+ * Assume that len is a positive number and is multiple of 8
+ */
+@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
+@ const float *src1, int len)
+function ff_vector_fmul_reverse_vfp, export=1
+ vpush {d8-d15}
+ add r2, r2, r3, lsl #2
+ vldmdb r2!, {s0-s3}
+ vldmia r1!, {s8-s11}
+ vldmdb r2!, {s4-s7}
+ vldmia r1!, {s12-s15}
+ vmul.f32 s8, s3, s8
+ vmul.f32 s9, s2, s9
+ vmul.f32 s10, s1, s10
+ vmul.f32 s11, s0, s11
+1:
+ subs r3, r3, #16
+ it ge
+ vldmdbge r2!, {s16-s19}
+ vmul.f32 s12, s7, s12
+ it ge
+ vldmiage r1!, {s24-s27}
+ vmul.f32 s13, s6, s13
+ it ge
+ vldmdbge r2!, {s20-s23}
+ vmul.f32 s14, s5, s14
+ it ge
+ vldmiage r1!, {s28-s31}
+ vmul.f32 s15, s4, s15
+ it ge
+ vmulge.f32 s24, s19, s24
+ it gt
+ vldmdbgt r2!, {s0-s3}
+ it ge
+ vmulge.f32 s25, s18, s25
+ vstmia r0!, {s8-s13}
+ it ge
+ vmulge.f32 s26, s17, s26
+ it gt
+ vldmiagt r1!, {s8-s11}
+ itt ge
+ vmulge.f32 s27, s16, s27
+ vmulge.f32 s28, s23, s28
+ it gt
+ vldmdbgt r2!, {s4-s7}
+ it ge
+ vmulge.f32 s29, s22, s29
+ vstmia r0!, {s14-s15}
+ ittt ge
+ vmulge.f32 s30, s21, s30
+ vmulge.f32 s31, s20, s31
+ vmulge.f32 s8, s3, s8
+ it gt
+ vldmiagt r1!, {s12-s15}
+ itttt ge
+ vmulge.f32 s9, s2, s9
+ vmulge.f32 s10, s1, s10
+ vstmiage r0!, {s24-s27}
+ vmulge.f32 s11, s0, s11
+ it ge
+ vstmiage r0!, {s28-s31}
+ bgt 1b
+
+ vpop {d8-d15}
+ bx lr
+endfunc