From ce58642ed0d6dade63ff1b2467aa573e97c0cc91 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 8 Dec 2012 16:12:38 -0800 Subject: x86inc: support stack mem allocation and re-alignment in PROLOGUE. Use this in VP8/H264-8bit loopfilter functions so they can be used if there is no aligned stack (e.g. MSVC 32bit or ICC 10.x). Signed-off-by: Michael Niedermayer --- libavcodec/x86/h264_deblock.asm | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'libavcodec/x86/h264_deblock.asm') diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index cf7562141a..8ac38fcaa8 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -400,14 +400,12 @@ DEBLOCK_LUMA ;----------------------------------------------------------------------------- ; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_%1_luma_8, 5,5 +cglobal deblock_%1_luma_8, 5,5,8,2*%2 lea r4, [r1*3] dec r2 ; alpha-1 neg r4 dec r3 ; beta-1 add r4, r0 ; pix-3*stride - %assign pad 2*%2+12-(stack_offset&15) - SUB esp, pad mova m0, [r4+r1] ; p1 mova m1, [r4+2*r1] ; p0 @@ -445,22 +443,19 @@ cglobal deblock_%1_luma_8, 5,5 DEBLOCK_P0_Q0 mova [r4+2*r1], m1 mova [r0], m2 - ADD esp, pad RET ;----------------------------------------------------------------------------- ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- INIT_MMX cpuname -cglobal deblock_h_luma_8, 0,5 +cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12 mov r0, r0mp mov r3, r1m lea r4, [r3*3] sub r0, 4 lea r1, [r0+r4] - %assign pad 0x78-(stack_offset&15) - SUB esp, pad -%define pix_tmp esp+12 +%define pix_tmp esp+12*HAVE_ALIGNED_STACK ; transpose 6x16 -> tmp space TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp @@ -502,7 +497,6 @@ cglobal deblock_h_luma_8, 0,5 movq m3, [pix_tmp+0x48] TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4) - ADD esp, pad RET %endmacro ; DEBLOCK_LUMA @@ -635,7 +629,7 @@ DEBLOCK_LUMA v, 16 %define mpb_0 m14 %define mpb_1 m15 %else - %define spill(x) [esp+16*x+((stack_offset+4)&15)] + %define spill(x) [esp+16*x] %define p2 [r4+r1] %define q2 [r0+2*r1] %define t4 spill(0) @@ -650,10 +644,7 @@ DEBLOCK_LUMA v, 16 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_%1_luma_intra_8, 4,6,16 -%if ARCH_X86_64 == 0 - sub esp, 0x60 -%endif +cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 lea r4, [r1*4] lea r5, [r1*3] ; 3*stride dec r2d ; alpha-1 @@ -702,9 +693,6 @@ cglobal deblock_%1_luma_intra_8, 4,6,16 LUMA_INTRA_SWAP_PQ LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5] .end: -%if ARCH_X86_64 == 0 - add esp, 0x60 -%endif RET INIT_MMX cpuname @@ -741,12 +729,10 @@ cglobal deblock_h_luma_intra_8, 4,9 add rsp, 0x88 RET %else -cglobal deblock_h_luma_intra_8, 2,4 +cglobal deblock_h_luma_intra_8, 2,4,8,0x80 lea r3, [r1*3] sub r0, 4 lea r2, [r0+r3] -%assign pad 0x8c-(stack_offset&15) - SUB rsp, pad %define pix_tmp rsp ; transpose 8x16 -> tmp space @@ -777,7 +763,6 @@ cglobal deblock_h_luma_intra_8, 2,4 lea r0, [r0+r1*8] lea r2, [r2+r1*8] TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3) - ADD rsp, pad RET %endif ; ARCH_X86_64 %endmacro ; DEBLOCK_LUMA_INTRA -- cgit v1.2.3