summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:21:10 -0800
committerRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:21:10 -0800
commitfef906c77c09940a2fdad155b2adc05080e17eda (patch)
tree04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/x86
parentaeaf268e52fc11c1f64914a319e0edddf1346d6a (diff)
Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.
Conveniently (together with Justin's earlier patches), this makes our vorbis decoder entirely independent of dsputil.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/dsputil_mmx.c63
-rw-r--r--libavcodec/x86/vorbisdsp_init.c101
3 files changed, 102 insertions, 63 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index b5a7694bcf..6069968a09 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
+OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 13f215135a..74f7df5002 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1829,65 +1829,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
avg_pixels8_mmxext(dst, src, stride, 8);
}
-static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
-{
- int i;
- __asm__ volatile ("pxor %%mm7, %%mm7":);
- for (i = 0; i < blocksize; i += 2) {
- __asm__ volatile (
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
- "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
- "pslld $31, %%mm2 \n\t" // keep only the sign bit
- "pxor %%mm2, %%mm1 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "pand %%mm1, %%mm3 \n\t"
- "pandn %%mm1, %%mm4 \n\t"
- "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movq %%mm3, %1 \n\t"
- "movq %%mm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
- __asm__ volatile ("femms");
-}
-
-static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
-{
- int i;
-
- __asm__ volatile (
- "movaps %0, %%xmm5 \n\t"
- :: "m"(ff_pdw_80000000[0])
- );
- for (i = 0; i < blocksize; i += 4) {
- __asm__ volatile (
- "movaps %0, %%xmm0 \n\t"
- "movaps %1, %%xmm1 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "xorps %%xmm3, %%xmm3 \n\t"
- "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
- "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
- "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
- "xorps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm3, %%xmm4 \n\t"
- "andps %%xmm1, %%xmm3 \n\t"
- "andnps %%xmm1, %%xmm4 \n\t"
- "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movaps %%xmm3, %1 \n\t"
- "movaps %%xmm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
-}
-
static void vector_clipf_sse(float *dst, const float *src,
float min, float max, int len)
{
@@ -2238,8 +2179,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
-
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
@@ -2263,8 +2202,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
-
c->vector_clipf = vector_clipf_sse;
#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
new file mode 100644
index 0000000000..5243095003
--- /dev/null
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+#include "dsputil_mmx.h" // for ff_pdw_80000000
+
+#if HAVE_INLINE_ASM
+#if ARCH_X86_32
+static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+{
+ int i;
+ __asm__ volatile ("pxor %%mm7, %%mm7":);
+ for (i = 0; i < blocksize; i += 2) {
+ __asm__ volatile (
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+ __asm__ volatile ("femms");
+}
+#endif
+
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+ int i;
+
+ __asm__ volatile (
+ "movaps %0, %%xmm5 \n\t"
+ :: "m"(ff_pdw_80000000[0])
+ );
+ for (i = 0; i < blocksize; i += 4) {
+ __asm__ volatile (
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "xorps %%xmm2, %%xmm2 \n\t"
+ "xorps %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+ "xorps %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "andps %%xmm1, %%xmm3 \n\t"
+ "andnps %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+}
+#endif
+
+void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
+{
+#if HAVE_INLINE_ASM
+ int mm_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+ if (mm_flags & AV_CPU_FLAG_3DNOW)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+#endif /* ARCH_X86_32 */
+ if (mm_flags & AV_CPU_FLAG_SSE)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+#endif /* HAVE_INLINE_ASM */
+}