summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/acelp_pitch_delay.c2
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/int_neon.S28
-rw-r--r--libavcodec/dsputil.c4
-rw-r--r--libavcodec/dsputil.h3
-rw-r--r--libavcodec/x86/dsputil_yasm.asm7
6 files changed, 9 insertions, 38 deletions
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 395247dd2a..214a272c32 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -106,7 +106,7 @@ int16_t ff_acelp_decode_gain_code(
mr_energy += quant_energy[i] * ma_prediction_coeff[i];
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
- sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0));
+ sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
return mr_energy >> 12;
}
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 68e5b3ed42..b2931fe525 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -171,8 +171,7 @@ void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
-int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len,
- int shift);
+int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul);
diff --git a/libavcodec/arm/int_neon.S b/libavcodec/arm/int_neon.S
index 8bb58afb18..ea479bb580 100644
--- a/libavcodec/arm/int_neon.S
+++ b/libavcodec/arm/int_neon.S
@@ -29,32 +29,8 @@ function ff_scalarproduct_int16_neon, export=1
vmov.i16 q1, #0
vmov.i16 q2, #0
vmov.i16 q3, #0
- negs r3, r3
- beq 2f
-
- vdup.s32 q12, r3
1: vld1.16 {d16-d17}, [r0]!
vld1.16 {d20-d21}, [r1,:128]!
- vmull.s16 q12, d16, d20
- vld1.16 {d18-d19}, [r0]!
- vmull.s16 q13, d17, d21
- vld1.16 {d22-d23}, [r1,:128]!
- vmull.s16 q14, d18, d22
- vmull.s16 q15, d19, d23
- vshl.s32 q8, q12, q12
- vshl.s32 q9, q13, q12
- vadd.s32 q0, q0, q8
- vshl.s32 q10, q14, q12
- vadd.s32 q1, q1, q9
- vshl.s32 q11, q15, q12
- vadd.s32 q2, q2, q10
- vadd.s32 q3, q3, q11
- subs r2, r2, #16
- bne 1b
- b 3f
-
-2: vld1.16 {d16-d17}, [r0]!
- vld1.16 {d20-d21}, [r1,:128]!
vmlal.s16 q0, d16, d20
vld1.16 {d18-d19}, [r0]!
vmlal.s16 q1, d17, d21
@@ -62,9 +38,9 @@ function ff_scalarproduct_int16_neon, export=1
vmlal.s16 q2, d18, d22
vmlal.s16 q3, d19, d23
subs r2, r2, #16
- bne 2b
+ bne 1b
-3: vpadd.s32 d16, d0, d1
+ vpadd.s32 d16, d0, d1
vpadd.s32 d17, d2, d3
vpadd.s32 d10, d4, d5
vpadd.s32 d11, d6, d7
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 29c5976596..f5b7d076d1 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2559,12 +2559,12 @@ static void vector_clipf_c(float *dst, const float *src, float min, float max, i
}
}
-static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
+static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
{
int res = 0;
while (order--)
- res += (*v1++ * *v2++) >> shift;
+ res += *v1++ * *v2++;
return res;
}
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 0a6165685e..aa026e15f5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -536,9 +536,8 @@ typedef struct DSPContext {
/**
* Calculate scalar product of two vectors.
* @param len length of vectors, should be multiple of 16
- * @param shift number of bits to discard from product
*/
- int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
+ int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
/* ape functions */
/**
* Calculate scalar product of v1 and v2,
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 09940d147d..da08bdab50 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION_TEXT
%macro SCALARPRODUCT 1
-; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
-cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
+; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
+cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
shl orderq, 1
add v1q, orderq
add v2q, orderq
neg orderq
- movd m3, shiftm
pxor m2, m2
.loop:
movu m0, [v1q + orderq]
@@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
%if mmsize == 16
movhlps m0, m2
paddd m2, m0
- psrad m2, m3
pshuflw m0, m2, 0x4e
%else
- psrad m2, m3
pshufw m0, m2, 0x4e
%endif
paddd m2, m0