summaryrefslogtreecommitdiff
path: root/libavcodec/arm/vp9itxfm_neon.S
diff options
context:
space:
mode:
authorJanne Grunau <janne-libav@jannau.net>2017-01-10 00:15:09 +0200
committerMichael Niedermayer <michael@niedermayer.cc>2017-01-14 21:13:12 +0100
commita71cd8439fd32fd83b7a9b9ac8d6f861846770c7 (patch)
treedca1d9c0ae8be675e348618bda9f5ebd191b9f41 /libavcodec/arm/vp9itxfm_neon.S
parentcb220eeef9bfe889769dc4e08248b0a59d24e2a9 (diff)
arm: vp9itxfm: Simplify the stack alignment code
This is one instruction less for thumb, and only have got 1/2 arm/thumb specific instructions. This is cherrypicked from libav commit e5b0fc170f85b00f7dd0ac514918fb5c95253d39. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/arm/vp9itxfm_neon.S')
-rw-r--r--libavcodec/arm/vp9itxfm_neon.S28
1 files changed, 12 insertions, 16 deletions
diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 06470a3997..d7a2654dbe 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
.ifnc \txfm1\()_\txfm2,idct_idct
vpush {q4-q7}
.endif
- mov r7, sp
@ Align the stack, allocate a temp buffer
-T mov r12, sp
-T bic r12, r12, #15
-T sub r12, r12, #512
-T mov sp, r12
-A bic sp, sp, #15
-A sub sp, sp, #512
+T mov r7, sp
+T and r7, r7, #15
+A and r7, sp, #15
+ add r7, r7, #512
+ sub sp, sp, r7
mov r4, r0
mov r5, r1
@@ -828,7 +826,7 @@ A sub sp, sp, #512
bl \txfm2\()16_1d_4x16_pass2_neon
.endr
- mov sp, r7
+ add sp, sp, r7
.ifnc \txfm1\()_\txfm2,idct_idct
vpop {q4-q7}
.endif
@@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
beq idct32x32_dc_add_neon
push {r4-r7,lr}
vpush {q4-q7}
- mov r7, sp
@ Align the stack, allocate a temp buffer
-T mov r12, sp
-T bic r12, r12, #15
-T sub r12, r12, #2048
-T mov sp, r12
-A bic sp, sp, #15
-A sub sp, sp, #2048
+T mov r7, sp
+T and r7, r7, #15
+A and r7, sp, #15
+ add r7, r7, #2048
+ sub sp, sp, r7
mov r4, r0
mov r5, r1
@@ -1143,7 +1139,7 @@ A sub sp, sp, #2048
bl idct32_1d_4x32_pass2_neon
.endr
- mov sp, r7
+ add sp, sp, r7
vpop {q4-q7}
pop {r4-r7,pc}
endfunc