summaryrefslogtreecommitdiff
path: root/libavcodec/arm/h264dsp_neon.S
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2011-06-14 11:29:48 +0100
committerMans Rullgard <mans@mansr.com>2011-06-23 07:31:54 +0100
commit8986fddc2bab92bd7d77a123ac70c4fb70c96c7c (patch)
tree73b8c4a57c98be10d4403dc69ec3019a1665b3f0 /libavcodec/arm/h264dsp_neon.S
parent9cd7b8549b71bcfced2062596fd9eecba092aeb1 (diff)
ARM: allow building in Thumb2 mode
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/arm/h264dsp_neon.S')
-rw-r--r--libavcodec/arm/h264dsp_neon.S98
1 files changed, 54 insertions, 44 deletions
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index b76e4479b5..0fa4a6b0a5 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -71,7 +71,9 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
pld [r1]
pld [r1, r2]
- muls r7, r4, r5
+A muls r7, r4, r5
+T mul r7, r4, r5
+T cmp r7, #0
rsb r6, r7, r5, lsl #3
rsb ip, r7, r4, lsl #3
sub r4, r7, r4, lsl #3
@@ -197,7 +199,9 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
pld [r1]
pld [r1, r2]
- muls r7, r4, r5
+A muls r7, r4, r5
+T mul r7, r4, r5
+T cmp r7, #0
rsb r6, r7, r5, lsl #3
rsb ip, r7, r4, lsl #3
sub r4, r7, r4, lsl #3
@@ -368,10 +372,10 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
pop {r4-r6, pc}
2:
.ifc \type,put
- ldrh r5, [r1], r2
- strh r5, [r0], r2
- ldrh r6, [r1], r2
- strh r6, [r0], r2
+ ldrh_post r5, r1, r2
+ strh_post r5, r0, r2
+ ldrh_post r6, r1, r2
+ strh_post r6, r0, r2
.else
vld1.16 {d16[0]}, [r1], r2
vld1.16 {d16[1]}, [r1], r2
@@ -404,28 +408,17 @@ endfunc
ldr ip, [sp]
tst r2, r2
ldr ip, [ip]
+ it ne
tstne r3, r3
vmov.32 d24[0], ip
and ip, ip, ip, lsl #16
+ it eq
bxeq lr
ands ip, ip, ip, lsl #8
+ it lt
bxlt lr
.endm
- .macro align_push_regs
- and ip, sp, #15
- add ip, ip, #32
- sub sp, sp, ip
- vst1.64 {d12-d15}, [sp,:128]
- sub sp, sp, #32
- vst1.64 {d8-d11}, [sp,:128]
- .endm
-
- .macro align_pop_regs
- vld1.64 {d8-d11}, [sp,:128]!
- vld1.64 {d12-d15}, [sp,:128], ip
- .endm
-
.macro h264_loop_filter_luma
vdup.8 q11, r2 @ alpha
vmovl.u8 q12, d24
@@ -506,7 +499,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
vld1.64 {d18,d19}, [r0,:128], r1
vld1.64 {d16,d17}, [r0,:128], r1
- align_push_regs
+ vpush {d8-d15}
h264_loop_filter_luma
@@ -516,7 +509,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
vst1.64 {d0, d1}, [r0,:128], r1
vst1.64 {d10,d11}, [r0,:128]
- align_pop_regs
+ vpop {d8-d15}
bx lr
endfunc
@@ -543,7 +536,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13
- align_push_regs
+ vpush {d8-d15}
h264_loop_filter_luma
@@ -568,7 +561,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
vst1.32 {d1[1]}, [r0], r1
vst1.32 {d11[1]}, [r0], r1
- align_pop_regs
+ vpop {d8-d15}
bx lr
endfunc
@@ -1116,6 +1109,7 @@ function \type\()_h264_qpel8_hv_lowpass_neon
vrhadd.u8 d11, d11, d7
sub r0, r0, r2, lsl #3
.endif
+
vst1.64 {d12}, [r0,:64], r2
vst1.64 {d13}, [r0,:64], r2
vst1.64 {d14}, [r0,:64], r2
@@ -1263,7 +1257,9 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
\type\()_h264_qpel8_mc11:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #64
mov r0, sp
sub r1, r1, #2
@@ -1271,14 +1267,14 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
mov ip, #8
vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
mov r3, r2
add ip, sp, #64
sub r1, r1, r2, lsl #1
mov r2, #8
bl \type\()_h264_qpel8_v_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r11, pc}
endfunc
@@ -1287,7 +1283,9 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
\type\()_h264_qpel8_mc21:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(8*8+16*12)
sub r1, r1, #2
mov r3, #8
@@ -1296,14 +1294,14 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
sub r2, r4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r10, r11, pc}
endfunc
@@ -1330,7 +1328,9 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
\type\()_h264_qpel8_mc12:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(8*8+16*12)
sub r1, r1, r2, lsl #1
mov r3, r2
@@ -1339,20 +1339,22 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
vpush {d8-d15}
bl put_h264_qpel8_v_lowpass_neon
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r3, lsl #1
sub r1, r1, #2
sub r2, r4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r10, r11, pc}
endfunc
function ff_\type\()_h264_qpel8_mc22_neon, export=1
push {r4, r10, r11, lr}
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r4, r11, #15
+T mov sp, r4
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
@@ -1441,21 +1443,23 @@ function ff_\type\()_h264_qpel16_mc11_neon, export=1
\type\()_h264_qpel16_mc11:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #256
mov r0, sp
sub r1, r1, #2
mov r3, #16
vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
mov r3, r2
add ip, sp, #64
sub r1, r1, r2, lsl #1
mov r2, #16
bl \type\()_h264_qpel16_v_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r11, pc}
endfunc
@@ -1464,20 +1468,22 @@ function ff_\type\()_h264_qpel16_mc21_neon, export=1
\type\()_h264_qpel16_mc21:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(16*16+16*12)
sub r1, r1, #2
mov r0, sp
vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon_packed
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4-r5, r9-r11, pc}
endfunc
@@ -1504,7 +1510,9 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
\type\()_h264_qpel16_mc12:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(16*16+16*12)
sub r1, r1, r2, lsl #1
mov r0, sp
@@ -1512,13 +1520,13 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
vpush {d8-d15}
bl put_h264_qpel16_v_lowpass_neon_packed
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r3, lsl #1
sub r1, r1, #2
mov r2, r3
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4-r5, r9-r11, pc}
endfunc
@@ -1526,7 +1534,9 @@ function ff_\type\()_h264_qpel16_mc22_neon, export=1
push {r4, r9-r11, lr}
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r4, r11, #15
+T mov sp, r4
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2