summaryrefslogtreecommitdiff
path: root/libavutil
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-07-27 23:42:19 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-07-27 23:42:19 +0200
commitc6963a220d5849fd5399c056b21ec66de7a0df37 (patch)
tree142ce617e997fd542f0f2ccd460212b5a3dc6835 /libavutil
parent94c3e11a6f62bf13a7e6f1b9287c6112bf6ee445 (diff)
parent5361e10a5e8740146c09a115477310c77b927215 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: proresdsp: port x86 assembly to cpuflags. lavr: x86: improve non-SSE4 version of S16_TO_S32_SX macro lavfi: better channel layout negotiation alac: check for truncated packets alac: reverse lpc coeff order, simplify filter lavr: add x86-optimized mixing functions x86: add support for fmaddps fma4 instruction with abstraction to avx/sse tscc2: fix typo in array index build: use COMPILE template for HOSTOBJS build: do full flag handling for all compiler-type tools eval: fix printing of NaN in eval fate test. build: Rename aandct component to more descriptive aandcttables mpegaudio: bury inline asm under HAVE_INLINE_ASM. x86inc: automatically insert vzeroupper for YMM functions. rtmp: Check the buffer length of ping packets rtmp: Allow having more unknown data at the end of a chunk size packet without failing rtmp: Prevent reading outside of an allocate buffer when receiving server bandwidth packets Conflicts: Makefile configure libavcodec/x86/proresdsp.asm libavutil/eval.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/eval.c5
-rw-r--r--libavutil/x86/float_dsp.asm10
-rw-r--r--libavutil/x86/x86inc.asm31
3 files changed, 25 insertions, 21 deletions
diff --git a/libavutil/eval.c b/libavutil/eval.c
index fa76c6c949..6aa257efc4 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -797,11 +797,10 @@ int main(int argc, char **argv)
av_expr_parse_and_eval(&d, *expr,
const_names, const_values,
NULL, NULL, NULL, NULL, NULL, 0, NULL);
- if(isnan(d)){
+ if (isnan(d))
printf("'%s' -> nan\n\n", *expr);
- }else{
+ else
printf("'%s' -> %f\n\n", *expr, d);
- }
}
av_expr_parse_and_eval(&d, "1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)",
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index f68e0bfe2d..7a18a20aca 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -42,12 +42,7 @@ ALIGN 16
sub lenq, 2*mmsize
jge .loop
-%if mmsize == 32
- vzeroupper
- RET
-%else
REP_RET
-%endif
%endmacro
INIT_XMM sse
@@ -88,12 +83,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
mova [dstq+lenq+mmsize], m2
sub lenq, 2*mmsize
jge .loop
-%if mmsize == 32
- vzeroupper
- RET
-%else
REP_RET
-%endif
%endmacro
INIT_XMM sse
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 9a39df6ec8..c80e0a1c1a 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -392,11 +392,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
%macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
%macro REP_RET 0
- %if regs_used > 7 || xmm_regs_used > 6
+ %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
RET
%else
rep ret
@@ -433,11 +436,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
%macro RET 0
POP_IF_USED 14, 13, 12, 11, 10, 9
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
%macro REP_RET 0
- %if regs_used > 9
+ %if regs_used > 9 || mmsize == 32
RET
%else
rep ret
@@ -479,11 +485,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%macro RET 0
POP_IF_USED 6, 5, 4, 3
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
%macro REP_RET 0
- %if regs_used > 3
+ %if regs_used > 3 || mmsize == 32
RET
%else
rep ret
@@ -1126,16 +1135,22 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j
%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
+ %macro %1 5-8 %1, %2, %3
+ %if cpuflag(xop) || cpuflag(fma4)
+ v%6 %1, %2, %3, %4
%else
- %6 %1, %2, %3
- %7 %1, %4
+ %ifidn %1, %4
+ %7 %5, %2, %3
+ %8 %1, %4, %5
+ %else
+ %7 %1, %2, %3
+ %8 %1, %4
+ %endif
%endif
%endmacro
%endmacro
+FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd