Merge remote-tracking branch 'qatar/master'

* qatar/master: dsputilenc_mmx: split assignment of ff_sse16_sse2 to SSE2 section. dnxhdenc: add space between function argument type and comment. x86: fmtconvert: add special asm for float_to_int16_interleave_misc_* attributes: Add a definition of av_always_inline for MSVC cmdutils: Pass the actual chosen encoder to filter_codec_opts os_support: Add fallback definitions for stat flags os_support: Rename the poll fallback function to ff_poll network: Check for struct pollfd os_support: Don't compare a negative number against socket descriptors os_support: Include all the necessary headers for the win32 open function x86: vc1: fix and enable optimised loop filter Conflicts: cmdutils.c cmdutils.h ffmpeg.c ffplay.c libavformat/os_support.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2012-06-30 22:44:18 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2012-06-30 22:44:18 +0200
commit: 64b25938e90253432d28ffd7d971f085c560a523 (patch)
tree: 56ba6a39d7e8f14ebccdcc4db935164b5da624a2 /libavcodec/x86/fmtconvert.asm
parent: be24f85176d8e46c3154f1f51013f235b273183e (diff)
parent: ceabc13f129cd6344b1eebdbe10119083fe5520e (diff)
1 files changed, 78 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 72bee55669..9499a9e3a7 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -115,6 +115,84 @@ FLOAT_TO_INT16 sse, 0
 FLOAT_TO_INT16 3dnow, 0
 %undef cvtps2pi
 
+;------------------------------------------------------------------------------
+; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
+;------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16_STEP 2
+cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
+    add       lenq, lenq
+    lea       srcq, [srcq+2*lenq]
+    lea     step3q, [stepq*3]
+    neg       lenq
+.loop:
+%ifidn %1, sse2
+    cvtps2dq    m0, [srcq+2*lenq   ]
+    cvtps2dq    m1, [srcq+2*lenq+16]
+    packssdw    m0, m1
+    movd       v1d, m0
+    psrldq      m0, 4
+    movd       v2d, m0
+    psrldq      m0, 4
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+    movd       v1d, m0
+    psrldq      m0, 4
+    movd       v2d, m0
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+%else
+    cvtps2pi    m0, [srcq+2*lenq   ]
+    cvtps2pi    m1, [srcq+2*lenq+ 8]
+    cvtps2pi    m2, [srcq+2*lenq+16]
+    cvtps2pi    m3, [srcq+2*lenq+24]
+    packssdw    m0, m1
+    packssdw    m2, m3
+    movd       v1d, m0
+    psrlq       m0, 32
+    movd       v2d, m0
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+    movd       v1d, m2
+    psrlq       m2, 32
+    movd       v2d, m2
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+%endif
+    add       lenq, 16
+    js .loop
+%ifnidn %1, sse2
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_XMM
+FLOAT_TO_INT16_STEP sse2, 2
+INIT_MMX
+FLOAT_TO_INT16_STEP sse, 0
+%define cvtps2pi pf2id
+FLOAT_TO_INT16_STEP 3dnow, 0
+%undef cvtps2pi
 
 ;-------------------------------------------------------------------------------
 ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
author	Michael Niedermayer <michaelni@gmx.at>	2012-06-30 22:44:18 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2012-06-30 22:44:18 +0200
commit	64b25938e90253432d28ffd7d971f085c560a523 (patch)
tree	56ba6a39d7e8f14ebccdcc4db935164b5da624a2 /libavcodec/x86/fmtconvert.asm
parent	be24f85176d8e46c3154f1f51013f235b273183e (diff)
parent	ceabc13f129cd6344b1eebdbe10119083fe5520e (diff)