Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits) flvdec: Do not call parse_keyframes_index with a NULL stream libspeexdec: include system headers before local headers libspeexdec: return meaningful error codes libspeexdec: cosmetics: reindent libspeexdec: decode one frame at a time. swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables() Move timefilter code from lavf to lavd. mov: add support for hdvd and pgapmetadata atoms mov: rename function _stik, some indentation cosmetics mov: rename function _int8 to remove ambiguity, some indentation cosmetics mov: parse the gnre atom mp3on4: check for allocation failures in decode_init_mp3on4() mp3on4: create a separate flush function for MP3onMP4. mp3on4: ensure that the frame channel count does not exceed the codec channel count. mp3on4: set channel layout mp3on4: fix the output channel order mp3on4: allocate temp buffer with av_malloc() instead of on the stack. mp3on4: copy MPADSPContext from first context to all contexts. fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm ... Conflicts: libavcodec/arm/h264dsp_init_arm.c libavcodec/h264.c libavcodec/h264.h libavcodec/h264_cabac.c libavcodec/h264_cavlc.c libavcodec/h264_ps.c libavcodec/h264dsp_template.c libavcodec/h264idct_template.c libavcodec/h264pred.c libavcodec/h264pred_template.c libavcodec/x86/h264dsp_mmx.c libavdevice/Makefile libavdevice/jack_audio.c libavformat/Makefile libavformat/flvdec.c libavformat/flvenc.c libavutil/pixfmt.h libswscale/utils.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-10-22 01:03:27 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2011-10-22 01:16:41 +0200
commit: aedc908601de7396751a9a4504e064782d9f6a0b (patch)
tree: 8f04b899142439893bac426ac83d05c4068b099c /libavcodec/x86/fmtconvert.asm
parent: 1a7090bfafe986d4470ba8059c815939171ddb74 (diff)
parent: f4b51d061f0f34e36be876b562b8abe47f4b9c1c (diff)
1 files changed, 140 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 2deb577ca6..37e7a094ce 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -24,6 +24,146 @@
 
 SECTION_TEXT
 
+;---------------------------------------------------------------------------------
+; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
+;---------------------------------------------------------------------------------
+%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%ifdef ARCH_X86_64
+cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+%else
+cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+    movss   m0, mulm
+%endif
+    SPLATD  m0
+    shl     lenq, 2
+    add     srcq, lenq
+    add     dstq, lenq
+    neg     lenq
+.loop:
+%ifidn %1, sse2
+    cvtdq2ps  m1, [srcq+lenq   ]
+    cvtdq2ps  m2, [srcq+lenq+16]
+%else
+    cvtpi2ps  m1, [srcq+lenq   ]
+    cvtpi2ps  m3, [srcq+lenq+ 8]
+    cvtpi2ps  m2, [srcq+lenq+16]
+    cvtpi2ps  m4, [srcq+lenq+24]
+    movlhps   m1, m3
+    movlhps   m2, m4
+%endif
+    mulps     m1, m0
+    mulps     m2, m0
+    mova  [dstq+lenq   ], m1
+    mova  [dstq+lenq+16], m2
+    add     lenq, 32
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM
+%define SPLATD SPLATD_SSE
+%define movdqa movaps
+INT32_TO_FLOAT_FMUL_SCALAR sse, 5
+%undef movdqa
+%define SPLATD SPLATD_SSE2
+INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
+%undef SPLATD
+
+
+;------------------------------------------------------------------------------
+; void ff_float_to_int16(int16_t *dst, const float *src, long len);
+;------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16 2
+cglobal float_to_int16_%1, 3,3,%2, dst, src, len
+    add       lenq, lenq
+    lea       srcq, [srcq+2*lenq]
+    add       dstq, lenq
+    neg       lenq
+.loop:
+%ifidn %1, sse2
+    cvtps2dq    m0, [srcq+2*lenq   ]
+    cvtps2dq    m1, [srcq+2*lenq+16]
+    packssdw    m0, m1
+    mova  [dstq+lenq], m0
+%else
+    cvtps2pi    m0, [srcq+2*lenq   ]
+    cvtps2pi    m1, [srcq+2*lenq+ 8]
+    cvtps2pi    m2, [srcq+2*lenq+16]
+    cvtps2pi    m3, [srcq+2*lenq+24]
+    packssdw    m0, m1
+    packssdw    m2, m3
+    mova  [dstq+lenq  ], m0
+    mova  [dstq+lenq+8], m2
+%endif
+    add       lenq, 16
+    js .loop
+%ifnidn %1, sse2
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_XMM
+FLOAT_TO_INT16 sse2, 2
+INIT_MMX
+FLOAT_TO_INT16 sse, 0
+%define cvtps2pi pf2id
+FLOAT_TO_INT16 3dnow, 0
+%undef cvtps2pi
+
+
+;-------------------------------------------------------------------------------
+; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
+;-------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16_INTERLEAVE2 1
+cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
+    lea      lenq, [4*r2q]
+    mov     src1q, [src0q+gprsize]
+    mov     src0q, [src0q]
+    add      dstq, lenq
+    add     src0q, lenq
+    add     src1q, lenq
+    neg      lenq
+.loop:
+%ifidn %1, sse2
+    cvtps2dq   m0, [src0q+lenq]
+    cvtps2dq   m1, [src1q+lenq]
+    packssdw   m0, m1
+    movhlps    m1, m0
+    punpcklwd  m0, m1
+    mova  [dstq+lenq], m0
+%else
+    cvtps2pi   m0, [src0q+lenq  ]
+    cvtps2pi   m1, [src0q+lenq+8]
+    cvtps2pi   m2, [src1q+lenq  ]
+    cvtps2pi   m3, [src1q+lenq+8]
+    packssdw   m0, m1
+    packssdw   m2, m3
+    mova       m1, m0
+    punpcklwd  m0, m2
+    punpckhwd  m1, m2
+    mova  [dstq+lenq  ], m0
+    mova  [dstq+lenq+8], m1
+%endif
+    add      lenq, 16
+    js .loop
+%ifnidn %1, sse2
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_MMX
+%define cvtps2pi pf2id
+FLOAT_TO_INT16_INTERLEAVE2 3dnow
+%undef cvtps2pi
+%define movdqa movaps
+FLOAT_TO_INT16_INTERLEAVE2 sse
+%undef movdqa
+INIT_XMM
+FLOAT_TO_INT16_INTERLEAVE2 sse2
+
+
 %macro PSWAPD_SSE 2
     pshufw %1, %2, 0x4e
 %endmacro
author	Michael Niedermayer <michaelni@gmx.at>	2011-10-22 01:03:27 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2011-10-22 01:16:41 +0200
commit	aedc908601de7396751a9a4504e064782d9f6a0b (patch)
tree	8f04b899142439893bac426ac83d05c4068b099c /libavcodec/x86/fmtconvert.asm
parent	1a7090bfafe986d4470ba8059c815939171ddb74 (diff)
parent	f4b51d061f0f34e36be876b562b8abe47f4b9c1c (diff)