summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-02-24 02:34:38 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-02-24 08:30:19 +0100
commit3f3d748cab3805dcc48599a8747976e18da3ab68 (patch)
treedecb3fc4b90870da4562337616d8764b862731f3
parent6c6e4dd139159a7dbf1b85f583804b6334ad88c1 (diff)
x86: Move XOP emulation to x86util
We need the emulation to support the cases where the first argument is the same as the fourth. To achieve this a fifth argument working as a temporary may be needed. Emulation that doesn't obey the original instruction semantics can't be in x86inc. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/flacdsp.asm8
-rw-r--r--libavutil/x86/x86inc.asm19
-rw-r--r--libavutil/x86/x86util.asm19
3 files changed, 23 insertions, 23 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 1a83cd8f8f..37ee87b163 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -44,21 +44,21 @@ ALIGN 16
test jq, jq
jz .end_order
.loop_order:
- pmacsdql m2, m0, m1, m2
+ PMACSDQL m2, m0, m1, m2, m0
movd m0, [decodedq+jq*4]
- pmacsdql m3, m1, m0, m3
+ PMACSDQL m3, m1, m0, m3, m1
movd m1, [coeffsq+jq*4]
inc jq
jl .loop_order
.end_order:
- pmacsdql m2, m0, m1, m2
+ PMACSDQL m2, m0, m1, m2, m0
psrlq m2, m4
movd m0, [decodedq]
paddd m0, m2
movd [decodedq], m0
sub lend, 2
jl .ret
- pmacsdql m3, m1, m0, m3
+ PMACSDQL m3, m1, m0, m3, m1
psrlq m3, m4
movd m1, [decodedq+4]
paddd m1, m3
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 88cae0cb48..a7f9f54367 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1
%undef i
%undef j
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %elifidn %1, %4
- %6 %2, %3
- %7 %1, %2
- %else
- %6 %1, %2, %3
- %7 %1, %4
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsdd, pmulld, paddd
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmacsdql, pmuldq, paddq
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet.
%define tzcnt rep bsf
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 59e5df248e..df58cadf63 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -666,6 +666,25 @@
%endif
%endmacro
+%macro PMA_EMU 4
+ %macro %1 5-8 %2, %3, %4
+ %if cpuflag(xop)
+ v%6 %1, %2, %3, %4
+ %elifidn %1, %4
+ %7 %5, %2, %3
+ %8 %1, %4, %5
+ %else
+ %7 %1, %2, %3
+ %8 %1, %4
+ %endif
+ %endmacro
+%endmacro
+
+PMA_EMU PMACSWW, pmacsww, pmullw, paddw
+PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation
+PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation
+PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
+
; Wrapper for non-FMA version of fmaddps
%macro FMULADD_PS 5
%if cpuflag(fma3) || cpuflag(fma4)