summaryrefslogtreecommitdiff
path: root/libavcodec/x86/flacdsp.asm
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-02-08 02:54:51 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-02-13 22:14:59 +0100
commite87974bc00e997c5844300687a97a11e0dbf6f12 (patch)
treea2f0f1817660cc8f19aae39e579d5228c1f03d92 /libavcodec/x86/flacdsp.asm
parent23a8c63452009df21b3f184936b343593d4ccb04 (diff)
flac/x86: add ff_flac_lpc_32_xop()
Tested on an AMD FX 6300 679081 decicycles in ff_flac_lpc_32_xop, 32768 runs 774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/flacdsp.asm')
-rw-r--r--libavcodec/x86/flacdsp.asm21
1 files changed, 12 insertions, 9 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index e28f905c6f..1a83cd8f8f 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -24,7 +24,8 @@
SECTION .text
-INIT_XMM sse4
+%macro LPC_32 1
+INIT_XMM %1
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
sub lend, pred_orderd
jle .ret
@@ -43,25 +44,21 @@ ALIGN 16
test jq, jq
jz .end_order
.loop_order:
- pmuldq m0, m1
- paddq m2, m0
+ pmacsdql m2, m0, m1, m2
movd m0, [decodedq+jq*4]
- pmuldq m1, m0
- paddq m3, m1
+ pmacsdql m3, m1, m0, m3
movd m1, [coeffsq+jq*4]
inc jq
jl .loop_order
.end_order:
- pmuldq m0, m1
- paddq m2, m0
+ pmacsdql m2, m0, m1, m2
psrlq m2, m4
movd m0, [decodedq]
paddd m0, m2
movd [decodedq], m0
sub lend, 2
jl .ret
- pmuldq m1, m0
- paddq m3, m1
+ pmacsdql m3, m1, m0, m3
psrlq m3, m4
movd m1, [decodedq+4]
paddd m1, m3
@@ -69,3 +66,9 @@ ALIGN 16
jg .loop_sample
.ret:
REP_RET
+%endmacro
+
+%if HAVE_XOP_EXTERNAL
+LPC_32 xop
+%endif
+LPC_32 sse4