summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/x86/vp8dsp.asm85
1 files changed, 52 insertions, 33 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 4aa901e27f..3bdc8f7ed9 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -1360,22 +1360,29 @@ cglobal vp8_luma_dc_wht_mmx, 2,3
movd [%7+%9*2], m%4
%endmacro
-%macro SPLATB_REG 3-4
+%macro SPLATB_REG_MMX 2-3
movd %1, %2
-%ifidn %3, ssse3
- pshufb %1, %4
-%else
punpcklbw %1, %1
-%if mmsize == 16 ; sse2
- pshuflw %1, %1, 0x0
- punpcklqdq %1, %1
-%elifidn %3, mmx
punpcklwd %1, %1
punpckldq %1, %1
-%else ; mmxext
+%endmacro
+
+%macro SPLATB_REG_MMXEXT 2-3
+ movd %1, %2
+ punpcklbw %1, %1
pshufw %1, %1, 0x0
-%endif
-%endif
+%endmacro
+
+%macro SPLATB_REG_SSE2 2-3
+ movd %1, %2
+ punpcklbw %1, %1
+ pshuflw %1, %1, 0x0
+ punpcklqdq %1, %1
+%endmacro
+
+%macro SPLATB_REG_SSSE3 3
+ movd %1, %2
+ pshufb %1, %3
%endmacro
%macro SIMPLE_LOOPFILTER 3
@@ -1387,10 +1394,10 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
%if mmsize == 8 ; mmx/mmxext
mov r3, 2
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m0, m0
%endif
- SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register
+ SPLATB_REG m7, r2, m0 ; splat "flim" into register
; set up indexes to address 4 rows
mov r2, r1
@@ -1529,13 +1536,17 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
SIMPLE_LOOPFILTER mmx, v, 4
SIMPLE_LOOPFILTER mmx, h, 6
+%define SPLATB_REG SPLATB_REG_MMXEXT
SIMPLE_LOOPFILTER mmxext, v, 4
SIMPLE_LOOPFILTER mmxext, h, 6
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
SIMPLE_LOOPFILTER sse2, v, 3
SIMPLE_LOOPFILTER sse2, h, 6
+%define SPLATB_REG SPLATB_REG_SSSE3
SIMPLE_LOOPFILTER ssse3, v, 3
SIMPLE_LOOPFILTER ssse3, h, 6
@@ -1573,15 +1584,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m7, m7
%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
@@ -1614,9 +1625,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%define q0backup m8
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2028,17 +2039,20 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
INNER_LOOPFILTER mmx, v, 6, 16, 0
INNER_LOOPFILTER mmx, h, 6, 16, 0
-INNER_LOOPFILTER mmxext, v, 6, 16, 0
-INNER_LOOPFILTER mmxext, h, 6, 16, 0
-
INNER_LOOPFILTER mmx, v, 6, 8, 0
INNER_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+INNER_LOOPFILTER mmxext, v, 6, 16, 0
+INNER_LOOPFILTER mmxext, h, 6, 16, 0
INNER_LOOPFILTER mmxext, v, 6, 8, 0
INNER_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
INNER_LOOPFILTER sse2, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER sse2, h, 5, 16, 13
@@ -2048,6 +2062,7 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13
INNER_LOOPFILTER sse2, v, 6, 8, 13
INNER_LOOPFILTER sse2, h, 6, 8, 13
+%define SPLATB_REG SPLATB_REG_SSSE3
INNER_LOOPFILTER ssse3, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER ssse3, h, 5, 16, 13
@@ -2152,15 +2167,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m7, m7
%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
@@ -2200,9 +2215,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define lim_sign m15
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2696,17 +2711,20 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
MBEDGE_LOOPFILTER mmx, v, 6, 16, 0
MBEDGE_LOOPFILTER mmx, h, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
-
MBEDGE_LOOPFILTER mmx, v, 6, 8, 0
MBEDGE_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
+MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
%ifdef m8
MBEDGE_LOOPFILTER sse2, h, 5, 16, 16
@@ -2716,6 +2734,7 @@ MBEDGE_LOOPFILTER sse2, h, 6, 16, 16
MBEDGE_LOOPFILTER sse2, v, 6, 8, 16
MBEDGE_LOOPFILTER sse2, h, 6, 8, 16
+%define SPLATB_REG SPLATB_REG_SSSE3
MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16
%ifdef m8
MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16