summaryrefslogtreecommitdiff
path: root/libavcodec/x86/fmtconvert.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-05-19 05:12:45 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-05-19 06:00:31 +0200
commit75a37b57a59f6701d9443c5f7a0ceec108b27a18 (patch)
tree1eea866003f3d7385261dea40b5b8063e87f9b8a /libavcodec/x86/fmtconvert.asm
parent8529f9b36b7c1b8f2cb36ba2709983517c4b6458 (diff)
parent41e21e4db623ebd77f431a6f30cf21d62d9e1f33 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: APIchanges: fill in date and commit for request_sample_fmt Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders. Add support for request_sample_format in ffmpeg and ffplay. Add APIchanges entry for request_sample_fmt. Add request_sample_fmt field to AVCodecContext. Add float_interleave() to FmtConvertContext with x86-optimized versions. Remove unused make variable SEEK_REFFILE fate: remove redundant aref and vref references fate: remove do_ffmpeg_nocheck function fate: do not collect -benchmark output mpegaudiodec: remove decode_end() function fate: run aref and vref as regular tests mpegaudio: sanitise compute_antialias_* names mpeg12: add slice-threading checks to slice-threading initializers. h264: copy pixel_shift between slice threading contexts. mdec: enable frame-level multithreading. mdec.c: fix overread. Conflicts: libavcodec/aacdec.c libavcodec/ac3dec.c libavcodec/avcodec.h libavcodec/dca.c libavcodec/h264.c libavcodec/mdec.c libavcodec/mpeg12.c libavcodec/options.c libavcodec/version.h libavcodec/vorbisdec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/fmtconvert.asm')
-rw-r--r--libavcodec/x86/fmtconvert.asm141
1 files changed, 141 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index dc038dde73..171e52a165 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -20,6 +20,7 @@
;******************************************************************************
%include "x86inc.asm"
+%include "x86util.asm"
section .text align=16
@@ -89,3 +90,143 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow
%undef pswapd
FLOAT_TO_INT16_INTERLEAVE6 3dn2
%undef cvtps2pi
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro BUTTERFLYPS 3
+ movaps m%3, m%1
+ unpcklps m%1, m%2
+ unpckhps m%3, m%2
+ SWAP %2, %3
+%endmacro
+
+%macro FLOAT_INTERLEAVE6 2
+cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
+%ifdef ARCH_X86_64
+ %define lend r10d
+ mov lend, r2d
+%else
+ %define lend dword r2m
+%endif
+ mov src1q, [srcq+1*gprsize]
+ mov src2q, [srcq+2*gprsize]
+ mov src3q, [srcq+3*gprsize]
+ mov src4q, [srcq+4*gprsize]
+ mov src5q, [srcq+5*gprsize]
+ mov srcq, [srcq]
+ sub src1q, srcq
+ sub src2q, srcq
+ sub src3q, srcq
+ sub src4q, srcq
+ sub src5q, srcq
+.loop:
+%ifidn %1, sse
+ movaps m0, [srcq]
+ movaps m1, [srcq+src1q]
+ movaps m2, [srcq+src2q]
+ movaps m3, [srcq+src3q]
+ movaps m4, [srcq+src4q]
+ movaps m5, [srcq+src5q]
+
+ BUTTERFLYPS 0, 1, 6
+ BUTTERFLYPS 2, 3, 6
+ BUTTERFLYPS 4, 5, 6
+
+ movaps m6, m4
+ shufps m4, m0, 0xe4
+ movlhps m0, m2
+ movhlps m6, m2
+ movaps [dstq ], m0
+ movaps [dstq+16], m4
+ movaps [dstq+32], m6
+
+ movaps m6, m5
+ shufps m5, m1, 0xe4
+ movlhps m1, m3
+ movhlps m6, m3
+ movaps [dstq+48], m1
+ movaps [dstq+64], m5
+ movaps [dstq+80], m6
+%else ; mmx
+ movq m0, [srcq]
+ movq m1, [srcq+src1q]
+ movq m2, [srcq+src2q]
+ movq m3, [srcq+src3q]
+ movq m4, [srcq+src4q]
+ movq m5, [srcq+src5q]
+
+ SBUTTERFLY dq, 0, 1, 6
+ SBUTTERFLY dq, 2, 3, 6
+ SBUTTERFLY dq, 4, 5, 6
+ movq [dstq ], m0
+ movq [dstq+ 8], m2
+ movq [dstq+16], m4
+ movq [dstq+24], m1
+ movq [dstq+32], m3
+ movq [dstq+40], m5
+%endif
+ add srcq, mmsize
+ add dstq, mmsize*6
+ sub lend, mmsize/4
+ jg .loop
+%ifidn %1, mmx
+ emms
+%endif
+ REP_RET
+%endmacro
+
+INIT_MMX
+FLOAT_INTERLEAVE6 mmx, 0
+INIT_XMM
+FLOAT_INTERLEAVE6 sse, 7
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro FLOAT_INTERLEAVE2 2
+cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
+ mov src1q, [srcq+gprsize]
+ mov srcq, [srcq ]
+ sub src1q, srcq
+.loop
+ MOVPS m0, [srcq ]
+ MOVPS m1, [srcq+src1q ]
+ MOVPS m3, [srcq +mmsize]
+ MOVPS m4, [srcq+src1q+mmsize]
+
+ MOVPS m2, m0
+ PUNPCKLDQ m0, m1
+ PUNPCKHDQ m2, m1
+
+ MOVPS m1, m3
+ PUNPCKLDQ m3, m4
+ PUNPCKHDQ m1, m4
+
+ MOVPS [dstq ], m0
+ MOVPS [dstq+1*mmsize], m2
+ MOVPS [dstq+2*mmsize], m3
+ MOVPS [dstq+3*mmsize], m1
+
+ add srcq, mmsize*2
+ add dstq, mmsize*4
+ sub lend, mmsize/2
+ jg .loop
+%ifidn %1, mmx
+ emms
+%endif
+ REP_RET
+%endmacro
+
+INIT_MMX
+%define MOVPS movq
+%define PUNPCKLDQ punpckldq
+%define PUNPCKHDQ punpckhdq
+FLOAT_INTERLEAVE2 mmx, 0
+INIT_XMM
+%define MOVPS movaps
+%define PUNPCKLDQ unpcklps
+%define PUNPCKHDQ unpckhps
+FLOAT_INTERLEAVE2 sse, 5