diff options
author | James Almer <jamrial@gmail.com> | 2014-02-08 02:54:51 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-13 22:14:59 +0100 |
commit | e87974bc00e997c5844300687a97a11e0dbf6f12 (patch) | |
tree | a2f0f1817660cc8f19aae39e579d5228c1f03d92 /libavcodec/x86/flacdsp.asm | |
parent | 23a8c63452009df21b3f184936b343593d4ccb04 (diff) |
flac/x86: add ff_flac_lpc_32_xop()
Tested on an AMD FX 6300
679081 decicycles in ff_flac_lpc_32_xop, 32768 runs
774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/flacdsp.asm')
-rw-r--r-- | libavcodec/x86/flacdsp.asm | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index e28f905c6f..1a83cd8f8f 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -24,7 +24,8 @@ SECTION .text -INIT_XMM sse4 +%macro LPC_32 1 +INIT_XMM %1 cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j sub lend, pred_orderd jle .ret @@ -43,25 +44,21 @@ ALIGN 16 test jq, jq jz .end_order .loop_order: - pmuldq m0, m1 - paddq m2, m0 + pmacsdql m2, m0, m1, m2 movd m0, [decodedq+jq*4] - pmuldq m1, m0 - paddq m3, m1 + pmacsdql m3, m1, m0, m3 movd m1, [coeffsq+jq*4] inc jq jl .loop_order .end_order: - pmuldq m0, m1 - paddq m2, m0 + pmacsdql m2, m0, m1, m2 psrlq m2, m4 movd m0, [decodedq] paddd m0, m2 movd [decodedq], m0 sub lend, 2 jl .ret - pmuldq m1, m0 - paddq m3, m1 + pmacsdql m3, m1, m0, m3 psrlq m3, m4 movd m1, [decodedq+4] paddd m1, m3 @@ -69,3 +66,9 @@ ALIGN 16 jg .loop_sample .ret: REP_RET +%endmacro + +%if HAVE_XOP_EXTERNAL +LPC_32 xop +%endif +LPC_32 sse4 |