summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2010-07-26 14:00:15 +0000
committerRonald S. Bultje <rsbultje@gmail.com>2010-07-26 14:00:15 +0000
commit2a180c69eacdc6854957aabae3b0e3ee4d4fd774 (patch)
treed2e11a41b1130f1dde5a4ff5e8d371fdf0ebe0c8 /libavcodec
parentbcd4aa6498d83f81e18eb70efa52969c110ac7c6 (diff)
Save a register (or regsize of stackspace for x86-32) for the no-loop
mbedge loopfilter functions, by re-using space that holds a variable that we no longer need. Originally committed as revision 24510 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/vp8dsp.asm40
1 files changed, 24 insertions, 16 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index a67c5bcc79..b3070825f1 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -2200,11 +2200,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
; align stack
mov stack_reg, rsp ; backup stack pointer
and rsp, ~(mmsize-1) ; align stack
+%ifidn %2, sse2
+ sub rsp, mmsize * 7
+%else
sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr
; [3]=hev() result
; [4]=filter tmp result
; [5]/[6] = p2/q2 backup
; [7]=lim_res sign result
+%endif
%define flim_E [rsp]
%define flim_I [rsp+mmsize]
@@ -2215,7 +2219,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define q0backup [rsp+mmsize*4]
%define p2backup [rsp+mmsize*5]
%define q2backup [rsp+mmsize*6]
+%ifidn %2, sse2
+%define lim_sign [rsp]
+%else
%define lim_sign [rsp+mmsize*7]
+%endif
mova flim_E, m0
mova flim_I, m1
@@ -2232,7 +2240,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define q0backup m8
%define p2backup m13
%define q2backup m14
-%define lim_sign m15
+%define lim_sign m9
; splat function arguments
SPLATB_REG flim_E, E_reg, m7 ; E
@@ -2638,8 +2646,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
pmullw m1, [pw_9]
paddw m6, m7
paddw m1, m7
-%ifdef m15
- SWAP 7, 15
+%ifdef m9
+ SWAP 7, 9
%else
mova m7, lim_sign
%endif
@@ -2749,29 +2757,29 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
%define SPLATB_REG SPLATB_REG_SSE2
%define WRITE_8W WRITE_8W_SSE2
-MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
+MBEDGE_LOOPFILTER sse2, v, 5, 16, 15
%ifdef m8
-MBEDGE_LOOPFILTER sse2, h, 5, 16, 16
+MBEDGE_LOOPFILTER sse2, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER sse2, h, 6, 16, 16
+MBEDGE_LOOPFILTER sse2, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER sse2, v, 6, 8, 16
-MBEDGE_LOOPFILTER sse2, h, 6, 8, 16
+MBEDGE_LOOPFILTER sse2, v, 6, 8, 15
+MBEDGE_LOOPFILTER sse2, h, 6, 8, 15
%define SPLATB_REG SPLATB_REG_SSSE3
-MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16
+MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15
%ifdef m8
-MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16
+MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16
+MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16
-MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16
+MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15
+MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15
%define WRITE_8W WRITE_8W_SSE4
%ifdef m8
-MBEDGE_LOOPFILTER sse4, h, 5, 16, 16
+MBEDGE_LOOPFILTER sse4, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER sse4, h, 6, 16, 16
+MBEDGE_LOOPFILTER sse4, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER sse4, h, 6, 8, 16
+MBEDGE_LOOPFILTER sse4, h, 6, 8, 15