summaryrefslogtreecommitdiff
path: root/libavutil
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-10-08 11:22:54 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-10-08 11:23:00 +0200
commit1f17619fe4800dce32a81fda0cec9afad91ff095 (patch)
treed8da9d339ec07d6a0910dbe6563ad96d47cfa2b9 /libavutil
parent17d9c7c208915c6c090e10508056f68b93440839 (diff)
parentbbe4a6db44f0b55b424a5cc9d3e89cd88e250450 (diff)
Merge commit 'bbe4a6db44f0b55b424a5cc9d3e89cd88e250450'
* commit 'bbe4a6db44f0b55b424a5cc9d3e89cd88e250450': x86inc: Utilize the shadow space on 64-bit Windows Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/x86/x86inc.asm83
1 files changed, 46 insertions, 37 deletions
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index f31186642f..cb3f82b21a 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -353,14 +353,18 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%if stack_size < 0
%assign stack_size -stack_size
%endif
- %if mmsize != 8
- %assign xmm_regs_used %2
+ %assign stack_size_padded stack_size
+ %if WIN64
+ %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+ %if mmsize != 8
+ %assign xmm_regs_used %2
+ %if xmm_regs_used > 8
+ %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+ %endif
+ %endif
%endif
%if mmsize <= 16 && HAVE_ALIGNED_STACK
- %assign stack_size_padded stack_size + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
- %if xmm_regs_used > 6
- %assign stack_size_padded stack_size_padded + (xmm_regs_used - 6) * 16
- %endif
+ %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
SUB rsp, stack_size_padded
%else
%assign %%reg_num (regs_used - 1)
@@ -370,14 +374,6 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
; stack in a single instruction (i.e. mov rsp, rstk or mov
; rsp, [rsp+stack_size_padded])
mov rstk, rsp
- %assign stack_size_padded stack_size
- %if xmm_regs_used > 6
- %assign stack_size_padded stack_size_padded + (xmm_regs_used - 6) * 16
- %if mmsize == 32 && xmm_regs_used & 1
- ; re-align to 32 bytes
- %assign stack_size_padded (stack_size_padded + 16)
- %endif
- %endif
%if %1 < 0 ; need to store rsp on stack
sub rsp, gprsize+stack_size_padded
and rsp, ~(%%stack_alignment-1)
@@ -389,9 +385,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%xdefine rstkm rstk
%endif
%endif
- %if xmm_regs_used > 6
- WIN64_PUSH_XMM
- %endif
+ WIN64_PUSH_XMM
%endif
%endif
%endmacro
@@ -452,40 +446,55 @@ DECLARE_REG 14, R15, 120
%endmacro
%macro WIN64_PUSH_XMM 0
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movaps [rsp + (%%i-6)*16 + stack_size + (~stack_offset&8)], xmm %+ %%i
- %endrep
+ ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
+ %if xmm_regs_used > 6
+ movaps [rstk + stack_offset + 8], xmm6
+ %endif
+ %if xmm_regs_used > 7
+ movaps [rstk + stack_offset + 24], xmm7
+ %endif
+ %if xmm_regs_used > 8
+ %assign %%i 8
+ %rep xmm_regs_used-8
+ movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
%endmacro
%macro WIN64_SPILL_XMM 1
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16
- %if xmm_regs_used > 6
- SUB rsp, (xmm_regs_used-6)*16+16
- WIN64_PUSH_XMM
+ %if xmm_regs_used > 8
+ %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+ SUB rsp, stack_size_padded
%endif
+ WIN64_PUSH_XMM
%endmacro
%macro WIN64_RESTORE_XMM_INTERNAL 1
- %if xmm_regs_used > 6
+ %assign %%pad_size 0
+ %if xmm_regs_used > 8
%assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
+ %rep xmm_regs_used-8
%assign %%i %%i-1
- movaps xmm %+ %%i, [%1 + (%%i-6)*16+stack_size+(~stack_offset&8)]
+ movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32]
%endrep
- %if stack_size_padded == 0
- add %1, (xmm_regs_used-6)*16+16
- %endif
%endif
%if stack_size_padded > 0
%if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
mov rsp, rstkm
%else
add %1, stack_size_padded
+ %assign %%pad_size stack_size_padded
%endif
%endif
+ %if xmm_regs_used > 7
+ movaps xmm7, [%1 + stack_offset - %%pad_size + 24]
+ %endif
+ %if xmm_regs_used > 6
+ movaps xmm6, [%1 + stack_offset - %%pad_size + 8]
+ %endif
%endmacro
%macro WIN64_RESTORE_XMM 1
@@ -702,12 +711,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endif
align function_align
%2:
- RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
- %xdefine rstk rsp
- %assign stack_offset 0
- %assign stack_size 0
- %assign stack_size_padded 0
- %assign xmm_regs_used 0
+ RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
+ %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
+ %assign stack_offset 0 ; stack pointer offset relative to the return address
+ %assign stack_size 0 ; amount of stack space that can be freely used inside a function
+ %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
+ %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64
%ifnidn %3, ""
PROLOGUE %3
%endif