summaryrefslogtreecommitdiff
path: root/libavcodec/x86/fmtconvert.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-06-30 22:44:18 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-06-30 22:44:18 +0200
commit64b25938e90253432d28ffd7d971f085c560a523 (patch)
tree56ba6a39d7e8f14ebccdcc4db935164b5da624a2 /libavcodec/x86/fmtconvert.asm
parentbe24f85176d8e46c3154f1f51013f235b273183e (diff)
parentceabc13f129cd6344b1eebdbe10119083fe5520e (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: dsputilenc_mmx: split assignment of ff_sse16_sse2 to SSE2 section. dnxhdenc: add space between function argument type and comment. x86: fmtconvert: add special asm for float_to_int16_interleave_misc_* attributes: Add a definition of av_always_inline for MSVC cmdutils: Pass the actual chosen encoder to filter_codec_opts os_support: Add fallback definitions for stat flags os_support: Rename the poll fallback function to ff_poll network: Check for struct pollfd os_support: Don't compare a negative number against socket descriptors os_support: Include all the necessary headers for the win32 open function x86: vc1: fix and enable optimised loop filter Conflicts: cmdutils.c cmdutils.h ffmpeg.c ffplay.c libavformat/os_support.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/fmtconvert.asm')
-rw-r--r--libavcodec/x86/fmtconvert.asm78
1 files changed, 78 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 72bee55669..9499a9e3a7 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -115,6 +115,84 @@ FLOAT_TO_INT16 sse, 0
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
+;------------------------------------------------------------------------------
+; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
+;------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16_STEP 2
+cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
+ add lenq, lenq
+ lea srcq, [srcq+2*lenq]
+ lea step3q, [stepq*3]
+ neg lenq
+.loop:
+%ifidn %1, sse2
+ cvtps2dq m0, [srcq+2*lenq ]
+ cvtps2dq m1, [srcq+2*lenq+16]
+ packssdw m0, m1
+ movd v1d, m0
+ psrldq m0, 4
+ movd v2d, m0
+ psrldq m0, 4
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+ movd v1d, m0
+ psrldq m0, 4
+ movd v2d, m0
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+%else
+ cvtps2pi m0, [srcq+2*lenq ]
+ cvtps2pi m1, [srcq+2*lenq+ 8]
+ cvtps2pi m2, [srcq+2*lenq+16]
+ cvtps2pi m3, [srcq+2*lenq+24]
+ packssdw m0, m1
+ packssdw m2, m3
+ movd v1d, m0
+ psrlq m0, 32
+ movd v2d, m0
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+ movd v1d, m2
+ psrlq m2, 32
+ movd v2d, m2
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+%endif
+ add lenq, 16
+ js .loop
+%ifnidn %1, sse2
+ emms
+%endif
+ REP_RET
+%endmacro
+
+INIT_XMM
+FLOAT_TO_INT16_STEP sse2, 2
+INIT_MMX
+FLOAT_TO_INT16_STEP sse, 0
+%define cvtps2pi pf2id
+FLOAT_TO_INT16_STEP 3dnow, 0
+%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);