summaryrefslogtreecommitdiff
path: root/libavutil/arm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-11-27 13:39:52 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-11-27 13:39:52 +0100
commit2684d2e3ea8a1a2863ae9842d072341b44b09829 (patch)
tree3d116023d439f92f473480c8f68f6d27f9f62344 /libavutil/arm
parent257196209fe7d27ad22e18bf5757ffcad47dce6b (diff)
parent284ea790d89441fa1e6b2d72d3c1ed6d61972f0b (diff)
Merge commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b'
* commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b': dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil aacenc: use the correct output buffer aacdec: fix signed overflows in lcg_random() base64: fix signed overflow in shift Conflicts: libavcodec/dsputil.c libavutil/base64.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/arm')
-rw-r--r--libavutil/arm/float_dsp_init_neon.c4
-rw-r--r--libavutil/arm/float_dsp_neon.S38
2 files changed, 42 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index 3ca0288b31..88eb4b3d2a 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
int len);
+void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
+ int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
+ fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index 4aa6f838dd..6d7bd5236e 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
bx lr
.unreq len
endfunc
+
+function ff_vector_fmul_scalar_neon, export=1
+VFP len .req r2
+NOVFP len .req r3
+VFP vdup.32 q8, d0[0]
+NOVFP vdup.32 q8, r2
+ bics r12, len, #15
+ beq 3f
+ vld1.32 {q0},[r1,:128]!
+ vld1.32 {q1},[r1,:128]!
+1: vmul.f32 q0, q0, q8
+ vld1.32 {q2},[r1,:128]!
+ vmul.f32 q1, q1, q8
+ vld1.32 {q3},[r1,:128]!
+ vmul.f32 q2, q2, q8
+ vst1.32 {q0},[r0,:128]!
+ vmul.f32 q3, q3, q8
+ vst1.32 {q1},[r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0},[r1,:128]!
+ vst1.32 {q2},[r0,:128]!
+ vld1.32 {q1},[r1,:128]!
+ vst1.32 {q3},[r0,:128]!
+ b 1b
+2: vst1.32 {q2},[r0,:128]!
+ vst1.32 {q3},[r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0},[r1,:128]!
+ vmul.f32 q0, q0, q8
+ vst1.32 {q0},[r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc