summaryrefslogtreecommitdiff
path: root/libavcodec/x86/ac3dsp.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-05-29 02:55:19 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-05-29 03:34:35 +0200
commitb8a43bc1b50f409414493a05f6c4b7895ca4ddf9 (patch)
tree95dda1b7289aac9bdb1f457417baf9515aa4383a /libavcodec/x86/ac3dsp.asm
parent39d607e5bbc25ad9629683702b510e865434ef21 (diff)
parent90da52f01f8b6c22af22a002eb226989b1cf7ef8 (diff)
Merge remote-tracking branch 'qatar/master' into master
* qatar/master: (27 commits) ac3enc: fix LOCAL_ALIGNED usage in count_mantissa_bits() ac3dsp: do not use the ff_* prefix when referencing ff_ac3_bap_bits. ac3dsp: fix loop condition in ac3_update_bap_counts_c() ARM: unbreak build ac3enc: modify mantissa bit counting to keep bap counts for all values of bap instead of just 0 to 4. ac3enc: split mantissa bit counting into a separate function. ac3enc: store per-block/channel bap pointers by reference block in a 2D array rather than in the AC3Block struct. get_bits: add av_unused tag to cache variable sws: replace all long with int. ARM: aacdec: fix constraints on inline asm ARM: remove unnecessary volatile from inline asm ARM: add "cc" clobbers to inline asm where needed ARM: improve FASTDIV asm ac3enc: use LOCAL_ALIGNED macro APIchanges: fill in git hash for av_get_pix_fmt_name (0420bd7). lavu: add av_get_pix_fmt_name() convenience function cmdutils: remove OPT_FUNC2 swscale: fix crash in bilinear scaling. vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping webm: support stereo videos in matroska/webm muxer ... Conflicts: Changelog cmdutils.c cmdutils.h doc/APIchanges doc/muxers.texi ffmpeg.c ffplay.c libavcodec/ac3enc.c libavcodec/ac3enc_float.c libavcodec/avcodec.h libavcodec/get_bits.h libavcodec/libvpxenc.c libavcodec/version.h libavdevice/libdc1394.c libavformat/matroskaenc.c libavutil/avutil.h libswscale/rgb2rgb.c libswscale/swscale.c libswscale/swscale_template.c libswscale/x86/swscale_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/ac3dsp.asm')
-rw-r--r--libavcodec/x86/ac3dsp.asm53
1 files changed, 53 insertions, 0 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index b67f893f22..99c5df340e 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -27,6 +27,11 @@ SECTION_RODATA
; 16777216.0f - used in ff_float_to_fixed24()
pf_1_24: times 4 dd 0x4B800000
+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
+pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
%endif
ja .loop
REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+%macro PHADDD4 2 ; xmm src, xmm tmp
+ movhlps %2, %1
+ paddd %1, %2
+ pshufd %2, %1, 0x1
+ paddd %1, %2
+%endmacro
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
+ movdqa m0, [mant_cntq ]
+ movdqa m1, [mant_cntq+ 1*16]
+ paddw m0, [mant_cntq+ 2*16]
+ paddw m1, [mant_cntq+ 3*16]
+ paddw m0, [mant_cntq+ 4*16]
+ paddw m1, [mant_cntq+ 5*16]
+ paddw m0, [mant_cntq+ 6*16]
+ paddw m1, [mant_cntq+ 7*16]
+ paddw m0, [mant_cntq+ 8*16]
+ paddw m1, [mant_cntq+ 9*16]
+ paddw m0, [mant_cntq+10*16]
+ paddw m1, [mant_cntq+11*16]
+ pmaddwd m0, [ac3_bap_bits ]
+ pmaddwd m1, [ac3_bap_bits+16]
+ paddd m0, m1
+ PHADDD4 m0, m1
+ movd sumd, m0
+ movdqa m3, [pw_bap_mul1]
+ movhpd m0, [mant_cntq +2]
+ movlpd m0, [mant_cntq+1*32+2]
+ movhpd m1, [mant_cntq+2*32+2]
+ movlpd m1, [mant_cntq+3*32+2]
+ movhpd m2, [mant_cntq+4*32+2]
+ movlpd m2, [mant_cntq+5*32+2]
+ pmulhuw m0, m3
+ pmulhuw m1, m3
+ pmulhuw m2, m3
+ paddusw m0, m1
+ paddusw m0, m2
+ pmaddwd m0, [pw_bap_mul2]
+ PHADDD4 m0, m1
+ movd eax, m0
+ add eax, sumd
+ RET