summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_deblock.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/h264_deblock.asm')
-rw-r--r--libavcodec/x86/h264_deblock.asm27
1 files changed, 6 insertions, 21 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index c124c4daa0..1f1dbc6a2f 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -398,14 +398,12 @@ DEBLOCK_LUMA
;-----------------------------------------------------------------------------
; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_8, 5,5
+cglobal deblock_%1_luma_8, 5,5,8,2*%2
lea r4, [r1*3]
dec r2 ; alpha-1
neg r4
dec r3 ; beta-1
add r4, r0 ; pix-3*stride
- %assign pad 2*%2+12-(stack_offset&15)
- SUB esp, pad
mova m0, [r4+r1] ; p1
mova m1, [r4+2*r1] ; p0
@@ -443,22 +441,19 @@ cglobal deblock_%1_luma_8, 5,5
DEBLOCK_P0_Q0
mova [r4+2*r1], m1
mova [r0], m2
- ADD esp, pad
RET
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX cpuname
-cglobal deblock_h_luma_8, 0,5
+cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
mov r0, r0mp
mov r3, r1m
lea r4, [r3*3]
sub r0, 4
lea r1, [r0+r4]
- %assign pad 0x78-(stack_offset&15)
- SUB esp, pad
-%define pix_tmp esp+12
+%define pix_tmp esp+12*HAVE_ALIGNED_STACK
; transpose 6x16 -> tmp space
TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
@@ -500,7 +495,6 @@ cglobal deblock_h_luma_8, 0,5
movq m3, [pix_tmp+0x48]
TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
- ADD esp, pad
RET
%endmacro ; DEBLOCK_LUMA
@@ -631,7 +625,7 @@ DEBLOCK_LUMA v, 16
%define mpb_0 m14
%define mpb_1 m15
%else
- %define spill(x) [esp+16*x+((stack_offset+4)&15)]
+ %define spill(x) [esp+16*x]
%define p2 [r4+r1]
%define q2 [r0+2*r1]
%define t4 spill(0)
@@ -646,10 +640,7 @@ DEBLOCK_LUMA v, 16
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_intra_8, 4,6,16
-%if ARCH_X86_64 == 0
- sub esp, 0x60
-%endif
+cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
dec r2d ; alpha-1
@@ -698,9 +689,6 @@ cglobal deblock_%1_luma_intra_8, 4,6,16
LUMA_INTRA_SWAP_PQ
LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
.end:
-%if ARCH_X86_64 == 0
- add esp, 0x60
-%endif
RET
INIT_MMX cpuname
@@ -737,12 +725,10 @@ cglobal deblock_h_luma_intra_8, 4,9
add rsp, 0x88
RET
%else
-cglobal deblock_h_luma_intra_8, 2,4
+cglobal deblock_h_luma_intra_8, 2,4,8,0x80
lea r3, [r1*3]
sub r0, 4
lea r2, [r0+r3]
-%assign pad 0x8c-(stack_offset&15)
- SUB rsp, pad
%define pix_tmp rsp
; transpose 8x16 -> tmp space
@@ -773,7 +759,6 @@ cglobal deblock_h_luma_intra_8, 2,4
lea r0, [r0+r1*8]
lea r2, [r2+r1*8]
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
- ADD rsp, pad
RET
%endif ; ARCH_X86_64
%endmacro ; DEBLOCK_LUMA_INTRA