summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2012-07-15 18:01:10 +0200
committerDiego Biurrun <diego@biurrun.de>2017-03-14 17:23:32 +0100
commit994c4bc10751e39c7ed9f67ffd0c0dea5223daf2 (patch)
tree733e21b2c239787e61d5a82fdb4326c3780dc2fb
parent522d850e68ec4b77d3477b3c8f55b1ba00a9d69a (diff)
x86util: Port all macros to cpuflags
Also do some small cosmetic changes: Drop pointless _MMX suffix from ABSD2 macro name, drop pointless check for MMX support, we always assume MMX is available in our SIMD code, fix spelling.
-rw-r--r--libavcodec/x86/audiodsp.asm5
-rw-r--r--libavutil/x86/x86util.asm52
-rw-r--r--libswscale/x86/scale.asm10
3 files changed, 33 insertions, 34 deletions
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index e038c18bd8..0f183c5e1d 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -66,7 +66,7 @@ SCALARPRODUCT
; %1 = number of xmm registers used
; %2 = number of inline load/process/store loops per asm loop
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
-; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
+; %4 = CLIPD function takes min/max as float instead of int (SSE2 version)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
@@ -122,14 +122,11 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
%endmacro
INIT_MMX mmx
-%define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
-%define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
-%define CLIPD CLIPD_SSE41
%ifdef m8
VECTOR_CLIP_INT32 11, 1, 1, 0
%else
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index bba958ebfc..81a3ada6e6 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -174,13 +174,13 @@
%endif
%endmacro
-%macro PSIGNW_MMX 2
+%macro PSIGNW 2
+%if cpuflag(ssse3)
+ psignw %1, %2
+%else
pxor %1, %2
psubw %1, %2
-%endmacro
-
-%macro PSIGNW_SSSE3 2
- psignw %1, %2
+%endif
%endmacro
%macro ABS1 2
@@ -221,7 +221,7 @@
%endif
%endmacro
-%macro ABSB 2 ; source mmreg, temp mmreg (unused for ssse3)
+%macro ABSB 2 ; source mmreg, temp mmreg (unused for SSSE3)
%if cpuflag(ssse3)
pabsb %1, %1
%else
@@ -245,7 +245,7 @@
%endif
%endmacro
-%macro ABSD2_MMX 4
+%macro ABSD2 4
pxor %3, %3
pxor %4, %4
pcmpgtd %3, %1
@@ -290,7 +290,7 @@
%else
palignr %1, %2, %3
%endif
-%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp
+%else ; [dst,] src1, src2, imm, tmp
%define %%dst %1
%if %0==5
%ifnidn %1, %2
@@ -606,37 +606,47 @@
pminsw %1, %3
%endmacro
-%macro PMINSD_MMX 3 ; dst, src, tmp
+%macro PMINSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+ pminsd %1, %2
+%elif cpuflag(sse2)
+ cvtdq2ps %1, %1
+ minps %1, %2
+ cvtps2dq %1, %1
+%else
mova %3, %2
pcmpgtd %3, %1
pxor %1, %2
pand %1, %3
pxor %1, %2
+%endif
%endmacro
-%macro PMAXSD_MMX 3 ; dst, src, tmp
+%macro PMAXSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+ pmaxsd %1, %2
+%else
mova %3, %1
pcmpgtd %3, %2
pand %1, %3
pandn %3, %2
por %1, %3
+%endif
%endmacro
-%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
- PMINSD_MMX %1, %3, %4
- PMAXSD_MMX %1, %2, %4
-%endmacro
-
-%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
+%macro CLIPD 3-4
+%if cpuflag(sse4); src/dst, min, max, unused
+ pminsd %1, %3
+ pmaxsd %1, %2
+%elif cpuflag(sse2) ; src/dst, min (float), max (float), unused
cvtdq2ps %1, %1
minps %1, %3
maxps %1, %2
cvtps2dq %1, %1
-%endmacro
-
-%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused
- pminsd %1, %3
- pmaxsd %1, %2
+%else ; src/dst, min, max, tmp
+ PMINSD %1, %3, %4
+ PMAXSD %1, %2, %4
+%endif
%endmacro
%macro VBROADCASTSS 2 ; dst xmm/ymm, src m32
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index 982b432047..61709f47cb 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -364,15 +364,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi
movd [dstq+wq*2], m0
%endif ; %3 ==/!= X
%else ; %2 == 19
-%if mmsize == 8
- PMINSD_MMX m0, m2, m4
-%elif cpuflag(sse4)
- pminsd m0, m2
-%else ; sse2/ssse3
- cvtdq2ps m0, m0
- minps m0, m2
- cvtps2dq m0, m0
-%endif ; mmx/sse2/ssse3/sse4
+ PMINSD m0, m2, m4
%ifnidn %3, X
mova [dstq+wq*(4>>wshr)], m0
%else ; %3 == X