summaryrefslogtreecommitdiff
path: root/libavcodec/arm
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:21:10 -0800
committerRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:21:10 -0800
commitfef906c77c09940a2fdad155b2adc05080e17eda (patch)
tree04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/arm
parentaeaf268e52fc11c1f64914a319e0edddf1346d6a (diff)
Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.
Conveniently (together with Justin's earlier patches), this makes our vorbis decoder entirely independent of dsputil.
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/Makefile3
-rw-r--r--libavcodec/arm/dsputil_init_neon.c5
-rw-r--r--libavcodec/arm/dsputil_neon.S64
-rw-r--r--libavcodec/arm/vorbisdsp_init_arm.c36
-rw-r--r--libavcodec/arm/vorbisdsp_neon.S83
5 files changed, 122 insertions, 69 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index ac486f47dc..71048f9c4c 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
+OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o
OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o
@@ -86,6 +87,8 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o \
arm/h264cmc_neon.o \
+NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o
+
NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o
NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 34bb6191f1..ee0e9afa88 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -154,8 +154,6 @@ void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
-void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
-
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul);
@@ -307,9 +305,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
- if (CONFIG_VORBIS_DECODER)
- c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
-
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index a0d201cd85..ebc70ac109 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -19,7 +19,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "config.h"
#include "libavutil/arm/asm.S"
function ff_clear_block_neon, export=1
@@ -532,69 +531,6 @@ function ff_add_pixels_clamped_neon, export=1
bx lr
endfunc
-#if CONFIG_VORBIS_DECODER
-function ff_vorbis_inverse_coupling_neon, export=1
- vmov.i32 q10, #1<<31
- subs r2, r2, #4
- mov r3, r0
- mov r12, r1
- beq 3f
-
- vld1.32 {d24-d25},[r1,:128]!
- vld1.32 {d22-d23},[r0,:128]!
- vcle.s32 q8, q12, #0
- vand q9, q11, q10
- veor q12, q12, q9
- vand q2, q12, q8
- vbic q3, q12, q8
- vadd.f32 q12, q11, q2
- vsub.f32 q11, q11, q3
-1: vld1.32 {d2-d3}, [r1,:128]!
- vld1.32 {d0-d1}, [r0,:128]!
- vcle.s32 q8, q1, #0
- vand q9, q0, q10
- veor q1, q1, q9
- vst1.32 {d24-d25},[r3, :128]!
- vst1.32 {d22-d23},[r12,:128]!
- vand q2, q1, q8
- vbic q3, q1, q8
- vadd.f32 q1, q0, q2
- vsub.f32 q0, q0, q3
- subs r2, r2, #8
- ble 2f
- vld1.32 {d24-d25},[r1,:128]!
- vld1.32 {d22-d23},[r0,:128]!
- vcle.s32 q8, q12, #0
- vand q9, q11, q10
- veor q12, q12, q9
- vst1.32 {d2-d3}, [r3, :128]!
- vst1.32 {d0-d1}, [r12,:128]!
- vand q2, q12, q8
- vbic q3, q12, q8
- vadd.f32 q12, q11, q2
- vsub.f32 q11, q11, q3
- b 1b
-
-2: vst1.32 {d2-d3}, [r3, :128]!
- vst1.32 {d0-d1}, [r12,:128]!
- it lt
- bxlt lr
-
-3: vld1.32 {d2-d3}, [r1,:128]
- vld1.32 {d0-d1}, [r0,:128]
- vcle.s32 q8, q1, #0
- vand q9, q0, q10
- veor q1, q1, q9
- vand q2, q1, q8
- vbic q3, q1, q8
- vadd.f32 q1, q0, q2
- vsub.f32 q0, q0, q3
- vst1.32 {d2-d3}, [r0,:128]!
- vst1.32 {d0-d1}, [r1,:128]!
- bx lr
-endfunc
-#endif
-
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
diff --git a/libavcodec/arm/vorbisdsp_init_arm.c b/libavcodec/arm/vorbisdsp_init_arm.c
new file mode 100644
index 0000000000..ec8fbd5bc9
--- /dev/null
+++ b/libavcodec/arm/vorbisdsp_init_arm.c
@@ -0,0 +1,36 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
+void ff_vorbisdsp_init_arm(VorbisDSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags)) {
+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
+ }
+}
diff --git a/libavcodec/arm/vorbisdsp_neon.S b/libavcodec/arm/vorbisdsp_neon.S
new file mode 100644
index 0000000000..7df876c2bc
--- /dev/null
+++ b/libavcodec/arm/vorbisdsp_neon.S
@@ -0,0 +1,83 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_vorbis_inverse_coupling_neon, export=1
+ vmov.i32 q10, #1<<31
+ subs r2, r2, #4
+ mov r3, r0
+ mov r12, r1
+ beq 3f
+
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+1: vld1.32 {d2-d3}, [r1,:128]!
+ vld1.32 {d0-d1}, [r0,:128]!
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vst1.32 {d24-d25},[r3, :128]!
+ vst1.32 {d22-d23},[r12,:128]!
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ subs r2, r2, #8
+ ble 2f
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+ b 1b
+
+2: vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ it lt
+ bxlt lr
+
+3: vld1.32 {d2-d3}, [r1,:128]
+ vld1.32 {d0-d1}, [r0,:128]
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ vst1.32 {d2-d3}, [r0,:128]!
+ vst1.32 {d0-d1}, [r1,:128]!
+ bx lr
+endfunc