summaryrefslogtreecommitdiff
path: root/libavcodec/arm/vp3dsp_neon.S
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-01-18 16:43:04 +0100
committerRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:04:55 -0800
commitaeaf268e52fc11c1f64914a319e0edddf1346d6a (patch)
tree4596586d2adfda684defde76992d6fb4426b6089 /libavcodec/arm/vp3dsp_neon.S
parent992b03183819553a73b4f870a710ef500b4eb6d0 (diff)
vp3: integrate clear_blocks with idct of previous block.
This is identical to what e.g. vp8 does, and prevents the function call overhead (plus dependency on dsputil for this particular function). Arm asm updated by Janne Grunau <janne-libav@jannau.net>. Signed-off-by: Janne Grunau <janne-libav@jannau.net>
Diffstat (limited to 'libavcodec/arm/vp3dsp_neon.S')
-rw-r--r--libavcodec/arm/vp3dsp_neon.S22
1 files changed, 15 insertions, 7 deletions
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S
index e09de57281..e5ecfc337e 100644
--- a/libavcodec/arm/vp3dsp_neon.S
+++ b/libavcodec/arm/vp3dsp_neon.S
@@ -108,14 +108,20 @@ endfunc
function vp3_idct_start_neon
vpush {d8-d15}
+ vmov.i16 q4, #0
+ vmov.i16 q5, #0
movrel r3, vp3_idct_constants
vld1.64 {d0-d1}, [r3,:128]
- vld1.64 {d16-d19}, [r2,:128]!
- vld1.64 {d20-d23}, [r2,:128]!
- vld1.64 {d24-d27}, [r2,:128]!
+ vld1.64 {d16-d19}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
+ vld1.64 {d20-d23}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
+ vld1.64 {d24-d27}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
vadd.s16 q1, q8, q12
vsub.s16 q8, q8, q12
- vld1.64 {d28-d31}, [r2,:128]!
+ vld1.64 {d28-d31}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
vp3_idct_core_neon:
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
@@ -345,10 +351,12 @@ function ff_vp3_idct_add_neon, export=1
endfunc
function ff_vp3_idct_dc_add_neon, export=1
- ldrsh r2, [r2]
+ ldrsh r12, [r2]
mov r3, r0
- add r2, r2, #15
- vdup.16 q15, r2
+ add r12, r12, #15
+ vdup.16 q15, r12
+ mov r12, 0
+ strh r12, [r2]
vshr.s16 q15, q15, #5
vld1.8 {d0}, [r0,:64], r1