Merge remote-tracking branch 'qatar/master'

* qatar/master: vble: remove vble_error_close VBLE Decoder tta: use an integer instead of a pointer to iterate output samples shorten: do not modify samples pointer when interleaving mpc7: only support stereo input. dpcm: do not try to decode empty packets dpcm: remove unneeded buf_size==0 check. twinvq: add SSE/AVX optimized sum/difference stereo interleaving vqf/twinvq: pass vqf COMM chunk info in extradata vqf: do not set bits_per_coded_sample for TwinVQ. twinvq: check for allocation failure in init_mdct_win() swscale: add padding to conversion buffer. rtpdec: Simplify finalize_packet http: Handle proxy authentication http: Print an error message for Authorization Required, too AVOptions: don't return an invalid option when option list is empty AIFF: add 'twos' FourCC for the mux/demuxer (big endian PCM audio) Conflicts: libavcodec/avcodec.h libavcodec/tta.c libavcodec/vble.c libavcodec/version.h libavutil/opt.c libswscale/utils.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-11-12 02:50:25 +0100
committer: Michael Niedermayer <michaelni@gmx.at> 2011-11-12 02:50:25 +0100
commit: 29582df797745fa6c5eec22b007e4fd3a47e7dd9 (patch)
tree: 59c487e218e4f750b48151d548db9091236d096f /libavcodec/x86
parent: 6761b6b825c4aafff311a180a09c7013288480aa (diff)
parent: 29ae0565d98bb41e54fc74f74c330d3214825f47 (diff)
2 files changed, 55 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 844066200b..83b9219135 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2407,6 +2407,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min
 void ff_vector_clip_int32_sse4    (int32_t *dst, const int32_t *src, int32_t min,
                                    int32_t max, unsigned int len);
 
+extern void ff_butterflies_float_interleave_sse(float *dst, const float *src0,
+                                                const float *src1, int len);
+extern void ff_butterflies_float_interleave_avx(float *dst, const float *src0,
+                                                const float *src1, int len);
+
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
@@ -2849,6 +2854,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->vector_clipf = vector_clipf_sse;
 #if HAVE_YASM
             c->scalarproduct_float = ff_scalarproduct_float_sse;
+            c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
 #endif
         }
         if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW))
@@ -2906,6 +2912,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
                 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
             }
+            c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
         }
 #endif
     }
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 82678f1880..0e497c9cda 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
 %else
 VECTOR_CLIP_INT32 6, 1, 0, 0
 %endif
+
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float_interleave(float *dst, const float *src0,
+;                                      const float *src1, int len);
+;-----------------------------------------------------------------------------
+
+%macro BUTTERFLIES_FLOAT_INTERLEAVE 0
+cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
+%ifdef ARCH_X86_64
+    movsxd    lenq, lend
+%endif
+    test      lenq, lenq
+    jz .end
+    shl       lenq, 2
+    lea      src0q, [src0q +   lenq]
+    lea      src1q, [src1q +   lenq]
+    lea       dstq, [ dstq + 2*lenq]
+    neg       lenq
+.loop:
+    mova        m0, [src0q + lenq]
+    mova        m1, [src1q + lenq]
+    subps       m2, m0, m1
+    addps       m0, m0, m1
+    unpcklps    m1, m0, m2
+    unpckhps    m0, m0, m2
+%if cpuflag(avx)
+    vextractf128 [dstq + 2*lenq     ], m1, 0
+    vextractf128 [dstq + 2*lenq + 16], m0, 0
+    vextractf128 [dstq + 2*lenq + 32], m1, 1
+    vextractf128 [dstq + 2*lenq + 48], m0, 1
+%else
+    mova [dstq + 2*lenq         ], m1
+    mova [dstq + 2*lenq + mmsize], m0
+%endif
+    add       lenq, mmsize
+    jl .loop
+%if mmsize == 32
+    vzeroupper
+    RET
+%endif
+.end:
+    REP_RET
+%endmacro
+
+INIT_XMM sse
+BUTTERFLIES_FLOAT_INTERLEAVE
+INIT_YMM avx
+BUTTERFLIES_FLOAT_INTERLEAVE
author	Michael Niedermayer <michaelni@gmx.at>	2011-11-12 02:50:25 +0100
committer	Michael Niedermayer <michaelni@gmx.at>	2011-11-12 02:50:25 +0100
commit	29582df797745fa6c5eec22b007e4fd3a47e7dd9 (patch)
tree	59c487e218e4f750b48151d548db9091236d096f /libavcodec/x86
parent	6761b6b825c4aafff311a180a09c7013288480aa (diff)
parent	29ae0565d98bb41e54fc74f74c330d3214825f47 (diff)