summaryrefslogtreecommitdiff
path: root/libavcodec/arm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-12-13 23:21:37 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-12-13 23:21:37 +0100
commit3ba0bfe71fb18e955ca0110e5a65105d84932fbc (patch)
treeb308d41d170483c23c31c87d8dcb19fc169e2eb6 /libavcodec/arm
parent36be045ed7942e07742c3cf3d3012b1d2a9ec344 (diff)
parenta99273ebf328658c183c2d267f1c2b8bfac58bb3 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: ulti: Fix invalid reads lavf: dealloc private options in av_write_trailer yadif: support 10bit YUV vc1: mark with ER_MB_ERROR bits overconsumption lavc: introduce ER_MB_END and ER_MB_ERROR error_resilience: use the ER_ namespace build: move inclusion of subdir.mak to main subdir loop rv34: NEON optimised 4x4 dequant rv34: move 4x4 dequant to RV34DSPContext aacdec: Use intfloat.h rather than local punning union. Conflicts: libavcodec/h264.c libavcodec/vc1dec.c libavfilter/vf_yadif.c libavformat/Makefile Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/rv34dsp_init_neon.c3
-rw-r--r--libavcodec/arm/rv34dsp_neon.S24
2 files changed, 27 insertions, 0 deletions
diff --git a/libavcodec/arm/rv34dsp_init_neon.c b/libavcodec/arm/rv34dsp_init_neon.c
index 9a09fde7a9..acf2a7dcd3 100644
--- a/libavcodec/arm/rv34dsp_init_neon.c
+++ b/libavcodec/arm/rv34dsp_init_neon.c
@@ -25,9 +25,12 @@
void ff_rv34_inv_transform_neon(DCTELEM *block);
void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
+void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
+
+ c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon;
}
diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
index f700f5c321..423b537fb9 100644
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@@ -107,3 +107,27 @@ function ff_rv34_inv_transform_noround_neon, export=1
vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
bx lr
endfunc
+
+function ff_rv34_dequant4x4_neon, export=1
+ mov r3, r0
+ mov r12, #16
+ vdup.16 q0, r2
+ vmov.16 d0[0], r1
+ vld1.16 {d2}, [r0,:64], r12
+ vld1.16 {d4}, [r0,:64], r12
+ vld1.16 {d6}, [r0,:64], r12
+ vld1.16 {d16}, [r0,:64], r12
+ vmull.s16 q1, d2, d0
+ vmull.s16 q2, d4, d1
+ vmull.s16 q3, d6, d1
+ vmull.s16 q8, d16, d1
+ vqrshrn.s32 d2, q1, #4
+ vqrshrn.s32 d4, q2, #4
+ vqrshrn.s32 d6, q3, #4
+ vqrshrn.s32 d16, q8, #4
+ vst1.16 {d2}, [r3,:64], r12
+ vst1.16 {d4}, [r3,:64], r12
+ vst1.16 {d6}, [r3,:64], r12
+ vst1.16 {d16}, [r3,:64], r12
+ bx lr
+endfunc