From 994c4bc10751e39c7ed9f67ffd0c0dea5223daf2 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 15 Jul 2012 18:01:10 +0200 Subject: x86util: Port all macros to cpuflags Also do some small cosmetic changes: Drop pointless _MMX suffix from ABSD2 macro name, drop pointless check for MMX support, we always assume MMX is available in our SIMD code, fix spelling. --- libavcodec/x86/audiodsp.asm | 5 +---- libavutil/x86/x86util.asm | 52 +++++++++++++++++++++++++++------------------ libswscale/x86/scale.asm | 10 +-------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index e038c18bd8..0f183c5e1d 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -66,7 +66,7 @@ SCALARPRODUCT ; %1 = number of xmm registers used ; %2 = number of inline load/process/store loops per asm loop ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop -; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %4 = CLIPD function takes min/max as float instead of int (SSE2 version) ; %5 = suffix %macro VECTOR_CLIP_INT32 4-5 cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len @@ -122,14 +122,11 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len %endmacro INIT_MMX mmx -%define CLIPD CLIPD_MMX VECTOR_CLIP_INT32 0, 1, 0, 0 INIT_XMM sse2 VECTOR_CLIP_INT32 6, 1, 0, 0, _int -%define CLIPD CLIPD_SSE2 VECTOR_CLIP_INT32 6, 2, 0, 1 INIT_XMM sse4 -%define CLIPD CLIPD_SSE41 %ifdef m8 VECTOR_CLIP_INT32 11, 1, 1, 0 %else diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index bba958ebfc..81a3ada6e6 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -174,13 +174,13 @@ %endif %endmacro -%macro PSIGNW_MMX 2 +%macro PSIGNW 2 +%if cpuflag(ssse3) + psignw %1, %2 +%else pxor %1, %2 psubw %1, %2 -%endmacro - -%macro PSIGNW_SSSE3 2 - psignw %1, %2 +%endif %endmacro %macro ABS1 2 @@ -221,7 +221,7 @@ %endif %endmacro -%macro ABSB 2 ; source mmreg, temp mmreg (unused for ssse3) +%macro ABSB 2 ; source mmreg, temp mmreg (unused for SSSE3) %if cpuflag(ssse3) pabsb %1, %1 %else @@ -245,7 +245,7 @@ %endif %endmacro -%macro ABSD2_MMX 4 +%macro ABSD2 4 pxor %3, %3 pxor %4, %4 pcmpgtd %3, %1 @@ -290,7 +290,7 @@ %else palignr %1, %2, %3 %endif -%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp +%else ; [dst,] src1, src2, imm, tmp %define %%dst %1 %if %0==5 %ifnidn %1, %2 @@ -606,37 +606,47 @@ pminsw %1, %3 %endmacro -%macro PMINSD_MMX 3 ; dst, src, tmp +%macro PMINSD 3 ; dst, src, tmp/unused +%if cpuflag(sse4) + pminsd %1, %2 +%elif cpuflag(sse2) + cvtdq2ps %1, %1 + minps %1, %2 + cvtps2dq %1, %1 +%else mova %3, %2 pcmpgtd %3, %1 pxor %1, %2 pand %1, %3 pxor %1, %2 +%endif %endmacro -%macro PMAXSD_MMX 3 ; dst, src, tmp +%macro PMAXSD 3 ; dst, src, tmp/unused +%if cpuflag(sse4) + pmaxsd %1, %2 +%else mova %3, %1 pcmpgtd %3, %2 pand %1, %3 pandn %3, %2 por %1, %3 +%endif %endmacro -%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp - PMINSD_MMX %1, %3, %4 - PMAXSD_MMX %1, %2, %4 -%endmacro - -%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused +%macro CLIPD 3-4 +%if cpuflag(sse4); src/dst, min, max, unused + pminsd %1, %3 + pmaxsd %1, %2 +%elif cpuflag(sse2) ; src/dst, min (float), max (float), unused cvtdq2ps %1, %1 minps %1, %3 maxps %1, %2 cvtps2dq %1, %1 -%endmacro - -%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused - pminsd %1, %3 - pmaxsd %1, %2 +%else ; src/dst, min, max, tmp + PMINSD %1, %3, %4 + PMAXSD %1, %2, %4 +%endif %endmacro %macro VBROADCASTSS 2 ; dst xmm/ymm, src m32 diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 982b432047..61709f47cb 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -364,15 +364,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi movd [dstq+wq*2], m0 %endif ; %3 ==/!= X %else ; %2 == 19 -%if mmsize == 8 - PMINSD_MMX m0, m2, m4 -%elif cpuflag(sse4) - pminsd m0, m2 -%else ; sse2/ssse3 - cvtdq2ps m0, m0 - minps m0, m2 - cvtps2dq m0, m0 -%endif ; mmx/sse2/ssse3/sse4 + PMINSD m0, m2, m4 %ifnidn %3, X mova [dstq+wq*(4>>wshr)], m0 %else ; %3 == X -- cgit v1.2.3