From 4722a03c75d17d88312b91cd1006776844237349 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 12 Dec 2011 23:22:04 +0000 Subject: rv34: NEON optimised 4x4 dequant Signed-off-by: Mans Rullgard --- libavcodec/arm/rv34dsp_init_neon.c | 3 +++ libavcodec/arm/rv34dsp_neon.S | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/arm/rv34dsp_init_neon.c b/libavcodec/arm/rv34dsp_init_neon.c index 9a09fde7a9..acf2a7dcd3 100644 --- a/libavcodec/arm/rv34dsp_init_neon.c +++ b/libavcodec/arm/rv34dsp_init_neon.c @@ -25,9 +25,12 @@ void ff_rv34_inv_transform_neon(DCTELEM *block); void ff_rv34_inv_transform_noround_neon(DCTELEM *block); +void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q); void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) { c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon; c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon; + + c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon; } diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S index f700f5c321..423b537fb9 100644 --- a/libavcodec/arm/rv34dsp_neon.S +++ b/libavcodec/arm/rv34dsp_neon.S @@ -107,3 +107,27 @@ function ff_rv34_inv_transform_noround_neon, export=1 vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1 bx lr endfunc + +function ff_rv34_dequant4x4_neon, export=1 + mov r3, r0 + mov r12, #16 + vdup.16 q0, r2 + vmov.16 d0[0], r1 + vld1.16 {d2}, [r0,:64], r12 + vld1.16 {d4}, [r0,:64], r12 + vld1.16 {d6}, [r0,:64], r12 + vld1.16 {d16}, [r0,:64], r12 + vmull.s16 q1, d2, d0 + vmull.s16 q2, d4, d1 + vmull.s16 q3, d6, d1 + vmull.s16 q8, d16, d1 + vqrshrn.s32 d2, q1, #4 + vqrshrn.s32 d4, q2, #4 + vqrshrn.s32 d6, q3, #4 + vqrshrn.s32 d16, q8, #4 + vst1.16 {d2}, [r3,:64], r12 + vst1.16 {d4}, [r3,:64], r12 + vst1.16 {d6}, [r3,:64], r12 + vst1.16 {d16}, [r3,:64], r12 + bx lr +endfunc -- cgit v1.2.3