summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_deblock.asm
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2012-12-08 16:12:38 -0800
committerMichael Niedermayer <michaelni@gmx.at>2012-12-12 10:37:52 +0100
commitce58642ed0d6dade63ff1b2467aa573e97c0cc91 (patch)
tree5911ccf44159f032015e875a4bc7d11c1dd80381 /libavcodec/x86/h264_deblock.asm
parent82c0211213430a9aef0d6ef5b29de35043f5bcba (diff)
x86inc: support stack mem allocation and re-alignment in PROLOGUE.
Use this in VP8/H264-8bit loopfilter functions so they can be used if there is no aligned stack (e.g. MSVC 32bit or ICC 10.x). Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_deblock.asm')
-rw-r--r--libavcodec/x86/h264_deblock.asm27
1 files changed, 6 insertions, 21 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index cf7562141a..8ac38fcaa8 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -400,14 +400,12 @@ DEBLOCK_LUMA
;-----------------------------------------------------------------------------
; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_8, 5,5
+cglobal deblock_%1_luma_8, 5,5,8,2*%2
lea r4, [r1*3]
dec r2 ; alpha-1
neg r4
dec r3 ; beta-1
add r4, r0 ; pix-3*stride
- %assign pad 2*%2+12-(stack_offset&15)
- SUB esp, pad
mova m0, [r4+r1] ; p1
mova m1, [r4+2*r1] ; p0
@@ -445,22 +443,19 @@ cglobal deblock_%1_luma_8, 5,5
DEBLOCK_P0_Q0
mova [r4+2*r1], m1
mova [r0], m2
- ADD esp, pad
RET
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX cpuname
-cglobal deblock_h_luma_8, 0,5
+cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
mov r0, r0mp
mov r3, r1m
lea r4, [r3*3]
sub r0, 4
lea r1, [r0+r4]
- %assign pad 0x78-(stack_offset&15)
- SUB esp, pad
-%define pix_tmp esp+12
+%define pix_tmp esp+12*HAVE_ALIGNED_STACK
; transpose 6x16 -> tmp space
TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
@@ -502,7 +497,6 @@ cglobal deblock_h_luma_8, 0,5
movq m3, [pix_tmp+0x48]
TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
- ADD esp, pad
RET
%endmacro ; DEBLOCK_LUMA
@@ -635,7 +629,7 @@ DEBLOCK_LUMA v, 16
%define mpb_0 m14
%define mpb_1 m15
%else
- %define spill(x) [esp+16*x+((stack_offset+4)&15)]
+ %define spill(x) [esp+16*x]
%define p2 [r4+r1]
%define q2 [r0+2*r1]
%define t4 spill(0)
@@ -650,10 +644,7 @@ DEBLOCK_LUMA v, 16
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_intra_8, 4,6,16
-%if ARCH_X86_64 == 0
- sub esp, 0x60
-%endif
+cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
dec r2d ; alpha-1
@@ -702,9 +693,6 @@ cglobal deblock_%1_luma_intra_8, 4,6,16
LUMA_INTRA_SWAP_PQ
LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
.end:
-%if ARCH_X86_64 == 0
- add esp, 0x60
-%endif
RET
INIT_MMX cpuname
@@ -741,12 +729,10 @@ cglobal deblock_h_luma_intra_8, 4,9
add rsp, 0x88
RET
%else
-cglobal deblock_h_luma_intra_8, 2,4
+cglobal deblock_h_luma_intra_8, 2,4,8,0x80
lea r3, [r1*3]
sub r0, 4
lea r2, [r0+r3]
-%assign pad 0x8c-(stack_offset&15)
- SUB rsp, pad
%define pix_tmp rsp
; transpose 8x16 -> tmp space
@@ -777,7 +763,6 @@ cglobal deblock_h_luma_intra_8, 2,4
lea r0, [r0+r1*8]
lea r2, [r2+r1*8]
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
- ADD rsp, pad
RET
%endif ; ARCH_X86_64
%endmacro ; DEBLOCK_LUMA_INTRA