diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-11-12 02:50:25 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-11-12 02:50:25 +0100 |
commit | 29582df797745fa6c5eec22b007e4fd3a47e7dd9 (patch) | |
tree | 59c487e218e4f750b48151d548db9091236d096f /libavcodec/x86 | |
parent | 6761b6b825c4aafff311a180a09c7013288480aa (diff) | |
parent | 29ae0565d98bb41e54fc74f74c330d3214825f47 (diff) |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
vble: remove vble_error_close
VBLE Decoder
tta: use an integer instead of a pointer to iterate output samples
shorten: do not modify samples pointer when interleaving
mpc7: only support stereo input.
dpcm: do not try to decode empty packets
dpcm: remove unneeded buf_size==0 check.
twinvq: add SSE/AVX optimized sum/difference stereo interleaving
vqf/twinvq: pass vqf COMM chunk info in extradata
vqf: do not set bits_per_coded_sample for TwinVQ.
twinvq: check for allocation failure in init_mdct_win()
swscale: add padding to conversion buffer.
rtpdec: Simplify finalize_packet
http: Handle proxy authentication
http: Print an error message for Authorization Required, too
AVOptions: don't return an invalid option when option list is empty
AIFF: add 'twos' FourCC for the mux/demuxer (big endian PCM audio)
Conflicts:
libavcodec/avcodec.h
libavcodec/tta.c
libavcodec/vble.c
libavcodec/version.h
libavutil/opt.c
libswscale/utils.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 7 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 48 |
2 files changed, 55 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 844066200b..83b9219135 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2407,6 +2407,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); +extern void ff_butterflies_float_interleave_sse(float *dst, const float *src0, + const float *src1, int len); +extern void ff_butterflies_float_interleave_avx(float *dst, const float *src0, + const float *src1, int len); + void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); @@ -2849,6 +2854,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->vector_clipf = vector_clipf_sse; #if HAVE_YASM c->scalarproduct_float = ff_scalarproduct_float_sse; + c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; #endif } if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) @@ -2906,6 +2912,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx; } + c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; } #endif } diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 82678f1880..0e497c9cda 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 %else VECTOR_CLIP_INT32 6, 1, 0, 0 %endif + +;----------------------------------------------------------------------------- +; void ff_butterflies_float_interleave(float *dst, const float *src0, +; const float *src1, int len); +;----------------------------------------------------------------------------- + +%macro BUTTERFLIES_FLOAT_INTERLEAVE 0 +cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len +%ifdef ARCH_X86_64 + movsxd lenq, lend +%endif + test lenq, lenq + jz .end + shl lenq, 2 + lea src0q, [src0q + lenq] + lea src1q, [src1q + lenq] + lea dstq, [ dstq + 2*lenq] + neg lenq +.loop: + mova m0, [src0q + lenq] + mova m1, [src1q + lenq] + subps m2, m0, m1 + addps m0, m0, m1 + unpcklps m1, m0, m2 + unpckhps m0, m0, m2 +%if cpuflag(avx) + vextractf128 [dstq + 2*lenq ], m1, 0 + vextractf128 [dstq + 2*lenq + 16], m0, 0 + vextractf128 [dstq + 2*lenq + 32], m1, 1 + vextractf128 [dstq + 2*lenq + 48], m0, 1 +%else + mova [dstq + 2*lenq ], m1 + mova [dstq + 2*lenq + mmsize], m0 +%endif + add lenq, mmsize + jl .loop +%if mmsize == 32 + vzeroupper + RET +%endif +.end: + REP_RET +%endmacro + +INIT_XMM sse +BUTTERFLIES_FLOAT_INTERLEAVE +INIT_YMM avx +BUTTERFLIES_FLOAT_INTERLEAVE |