summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2012-06-18 23:39:14 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2012-07-27 11:25:48 -0400
commit79687079a97a039c325ab79d7a95920d800b791f (patch)
treeb9a7056666e923047c1f0722f4e0e6dafa35ad8d
parent0cf7d849ffcd82ef4b8279b0a5aec306c29717f7 (diff)
x86: add support for fmaddps fma4 instruction with abstraction to avx/sse
-rwxr-xr-xconfigure5
-rw-r--r--libavutil/x86/x86inc.asm16
2 files changed, 16 insertions, 5 deletions
diff --git a/configure b/configure
index fd90369212..715e49b020 100755
--- a/configure
+++ b/configure
@@ -242,6 +242,7 @@ Optimization options (experts only):
--disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations
+ --disable-fma4 disable FMA4 optimizations
--disable-armv5te disable armv5te optimizations
--disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations
@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST='
armv6t2
armvfp
avx
+ fma4
mmi
mmx
mmx2
@@ -1295,6 +1297,7 @@ mmx2_deps="mmx"
sse_deps="mmx"
ssse3_deps="sse"
avx_deps="ssse3"
+fma4_deps="avx"
aligned_stack_if_any="ppc x86"
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
@@ -2865,6 +2868,7 @@ EOF
check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
die "yasm not found, use --disable-yasm for a crippled build"
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
+ check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
fi
case "$cpu" in
@@ -3292,6 +3296,7 @@ if enabled x86; then
echo "SSE enabled ${sse-no}"
echo "SSSE3 enabled ${ssse3-no}"
echo "AVX enabled ${avx-no}"
+ echo "FMA4 enabled ${fma4-no}"
echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}"
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b76a10ca5e..4b4a19b208 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j
%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
+ %macro %1 5-8 %1, %2, %3
+ %if cpuflag(xop) || cpuflag(fma4)
+ v%6 %1, %2, %3, %4
%else
- %6 %1, %2, %3
- %7 %1, %4
+ %ifidn %1, %4
+ %7 %5, %2, %3
+ %8 %1, %4, %5
+ %else
+ %7 %1, %2, %3
+ %8 %1, %4
+ %endif
%endif
%endmacro
%endmacro
+FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd