diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-04-23 15:04:09 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-05-29 15:33:18 -0400 |
commit | c140fb2cbc8e95f2533e51c0e4acf51211cf45cf (patch) | |
tree | e7814d889e8880ced8583f901435e29612a7640f /libavresample/x86/audio_mix.asm | |
parent | 8dfc122719f3e1fa88fa9356a820843e6840df16 (diff) |
lavr: add x86-optimized functions for mixing 2 to 1 s16p with float coeffs
Diffstat (limited to 'libavresample/x86/audio_mix.asm')
-rw-r--r-- | libavresample/x86/audio_mix.asm | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index dbc79e585d..c2e310b3d5 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -21,6 +21,7 @@ %include "x86inc.asm" %include "x86util.asm" +%include "util.asm" SECTION_TEXT @@ -64,3 +65,47 @@ MIX_2_TO_1_FLTP_FLT INIT_YMM avx MIX_2_TO_1_FLTP_FLT %endif + +;----------------------------------------------------------------------------- +; void ff_mix_2_to_1_s16p_flt(int16_t **src, float **matrix, int len, +; int out_ch, int in_ch); +;----------------------------------------------------------------------------- + +%macro MIX_2_TO_1_S16P_FLT 0 +cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1 + mov src1q, [srcq+gprsize] + mov srcq, [srcq] + sub src1q, srcq + mov matrixq, [matrixq ] + VBROADCASTSS m4, [matrixq ] + VBROADCASTSS m5, [matrixq+4] + ALIGN 16 +.loop: + mova m0, [srcq ] + mova m2, [srcq+src1q] + S16_TO_S32_SX 0, 1 + S16_TO_S32_SX 2, 3 + cvtdq2ps m0, m0 + cvtdq2ps m1, m1 + cvtdq2ps m2, m2 + cvtdq2ps m3, m3 + mulps m0, m4 + mulps m1, m4 + mulps m2, m5 + mulps m3, m5 + addps m0, m2 + addps m1, m3 + cvtps2dq m0, m0 + cvtps2dq m1, m1 + packssdw m0, m1 + mova [srcq], m0 + add srcq, mmsize + sub lend, mmsize/2 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +MIX_2_TO_1_S16P_FLT +INIT_XMM sse4 +MIX_2_TO_1_S16P_FLT |