summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/dsputil_neon.S48
-rw-r--r--libavcodec/dca.c9
-rw-r--r--libavcodec/dsputil.c9
-rw-r--r--libavcodec/dsputil.h11
-rw-r--r--libavutil/arm/float_dsp_init_neon.c4
-rw-r--r--libavutil/arm/float_dsp_neon.S48
-rw-r--r--libavutil/float_dsp.c9
-rw-r--r--libavutil/float_dsp.h16
9 files changed, 82 insertions, 75 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 398326c8c8..65db20d2b3 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
-void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
- int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
@@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->vector_fmul_window = ff_vector_fmul_window_neon;
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
- c->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon;
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 9a5a40d6ac..358ed61299 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -682,54 +682,6 @@ NOVFP vdup.32 q8, r2
.unreq len
endfunc
-function ff_vector_fmac_scalar_neon, export=1
-VFP len .req r2
-VFP acc .req r3
-NOVFP len .req r3
-NOVFP acc .req r2
-VFP vdup.32 q15, d0[0]
-NOVFP vdup.32 q15, r2
- bics r12, len, #15
- mov acc, r0
- beq 3f
- vld1.32 {q0}, [r1,:128]!
- vld1.32 {q8}, [acc,:128]!
- vld1.32 {q1}, [r1,:128]!
- vld1.32 {q9}, [acc,:128]!
-1: vmla.f32 q8, q0, q15
- vld1.32 {q2}, [r1,:128]!
- vld1.32 {q10}, [acc,:128]!
- vmla.f32 q9, q1, q15
- vld1.32 {q3}, [r1,:128]!
- vld1.32 {q11}, [acc,:128]!
- vmla.f32 q10, q2, q15
- vst1.32 {q8}, [r0,:128]!
- vmla.f32 q11, q3, q15
- vst1.32 {q9}, [r0,:128]!
- subs r12, r12, #16
- beq 2f
- vld1.32 {q0}, [r1,:128]!
- vld1.32 {q8}, [acc,:128]!
- vst1.32 {q10}, [r0,:128]!
- vld1.32 {q1}, [r1,:128]!
- vld1.32 {q9}, [acc,:128]!
- vst1.32 {q11}, [r0,:128]!
- b 1b
-2: vst1.32 {q10}, [r0,:128]!
- vst1.32 {q11}, [r0,:128]!
- ands len, len, #15
- it eq
- bxeq lr
-3: vld1.32 {q0}, [r1,:128]!
- vld1.32 {q8}, [acc,:128]!
- vmla.f32 q8, q0, q15
- vst1.32 {q8}, [r0,:128]!
- subs len, len, #4
- bgt 3b
- bx lr
- .unreq len
-endfunc
-
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 103f0588e3..b37dc49d3f 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -27,6 +27,7 @@
#include <stdio.h>
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/intmath.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
@@ -383,7 +384,7 @@ typedef struct {
int profile;
int debug_flag; ///< used for suppressing repeated error messages output
- DSPContext dsp;
+ AVFloatDSPContext fdsp;
FFTContext imdct;
SynthFilterContext synth;
DCADSPContext dcadsp;
@@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256;
float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
- s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
- s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
+ s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
+ s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
}
if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
@@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx)
s->avctx = avctx;
dca_init_vlcs();
- ff_dsputil_init(&s->dsp, avctx);
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
ff_mdct_init(&s->imdct, 6, 1, 1.0);
ff_synth_filter_init(&s->synth);
ff_dcadsp_init(&s->dcadsp);
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 942f606ea8..15f184e406 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
dst[i] = src[i] * mul;
}
-static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
- int len)
-{
- int i;
- for (i = 0; i < len; i++)
- dst[i] += src[i] * mul;
-}
-
static void butterflies_float_c(float *restrict v1, float *restrict v2,
int len)
{
@@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->butterflies_float = butterflies_float_c;
c->butterflies_float_interleave = butterflies_float_interleave_c;
c->vector_fmul_scalar = vector_fmul_scalar_c;
- c->vector_fmac_scalar = vector_fmac_scalar_c;
c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ec3d7ee007..e54ae69831 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -417,17 +417,6 @@ typedef struct DSPContext {
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
/**
- * Multiply a vector of floats by a scalar float and add to
- * destination vector. Source and destination vectors must
- * overlap exactly or not at all.
- * @param dst result vector, 16-byte aligned
- * @param src input vector, 16-byte aligned
- * @param mul scalar value
- * @param len length of vector, multiple of 4
- */
- void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
- int len);
- /**
* Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned
* @param v2 second vector, 16-byte aligned
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index fa6d0d7d15..3ca0288b31 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -26,7 +26,11 @@
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+ int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
+ fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index d66fa09424..03b164388f 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
3: vst1.32 {d16-d19},[r0,:128]!
bx lr
endfunc
+
+function ff_vector_fmac_scalar_neon, export=1
+VFP len .req r2
+VFP acc .req r3
+NOVFP len .req r3
+NOVFP acc .req r2
+VFP vdup.32 q15, d0[0]
+NOVFP vdup.32 q15, r2
+ bics r12, len, #15
+ mov acc, r0
+ beq 3f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+1: vmla.f32 q8, q0, q15
+ vld1.32 {q2}, [r1,:128]!
+ vld1.32 {q10}, [acc,:128]!
+ vmla.f32 q9, q1, q15
+ vld1.32 {q3}, [r1,:128]!
+ vld1.32 {q11}, [acc,:128]!
+ vmla.f32 q10, q2, q15
+ vst1.32 {q8}, [r0,:128]!
+ vmla.f32 q11, q3, q15
+ vst1.32 {q9}, [r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vst1.32 {q10}, [r0,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ b 1b
+2: vst1.32 {q10}, [r0,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vmla.f32 q8, q0, q15
+ vst1.32 {q8}, [r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 039dd07d36..2e90939090 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
dst[i] = src0[i] * src1[i];
}
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
+ int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ dst[i] += src[i] * mul;
+}
+
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
fdsp->vector_fmul = vector_fmul_c;
+ fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
#if ARCH_ARM
ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 30161a252b..4e266304da 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
*/
void (*vector_fmul)(float *dst, const float *src0, const float *src1,
int len);
+
+ /**
+ * Multiply a vector of floats by a scalar float and add to
+ * destination vector. Source and destination vectors must
+ * overlap exactly or not at all.
+ *
+ * @param dst result vector
+ * constraints: 16-byte aligned
+ * @param src input vector
+ * constraints: 16-byte aligned
+ * @param mul scalar value
+ * @param len length of vector
+ * constraints: multiple of 4
+ */
+ void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
+ int len);
} AVFloatDSPContext;
/**