rv34: joint coefficient decoding and dequantization

Perform dequantization while decoding coefficients instead of performing it on the entire coefficients buffer. Since quantized coefficients are very sparse, this usually causes a small speedup. Speedup of around 1% on Panda board compared to the removed here neon code. Global speedup is probably around 3%. Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>
author: Christophe GISQUET <christophe.gisquet@gmail.com> 2012-01-01 15:28:47 +0100
committer: Kostya Shishkov <kostya.shishkov@gmail.com> 2012-01-04 10:30:01 +0100
commit: 98f24ecd6cfc9c57a555aae6bfcd3d9a4ce9503d (patch)
tree: a4e562412fcd7dacf25afdbd3c2f8cfa5737c911 /libavcodec/arm
parent: 0749720b6cdce68e4908dc59f1b4e1399852372b (diff)
2 files changed, 0 insertions, 27 deletions
diff --git a/libavcodec/arm/rv34dsp_init_neon.c b/libavcodec/arm/rv34dsp_init_neon.c
index acf2a7dcd3..9a09fde7a9 100644
--- a/libavcodec/arm/rv34dsp_init_neon.c
+++ b/libavcodec/arm/rv34dsp_init_neon.c
@@ -25,12 +25,9 @@
 
 void ff_rv34_inv_transform_neon(DCTELEM *block);
 void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
-void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
 
 void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
 {
     c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
     c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
-
-    c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon;
 }
diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
index 423b537fb9..f700f5c321 100644
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@@ -107,27 +107,3 @@ function ff_rv34_inv_transform_noround_neon, export=1
         vst4.16         {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
         bx              lr
 endfunc
-
-function ff_rv34_dequant4x4_neon, export=1
-        mov             r3,  r0
-        mov             r12, #16
-        vdup.16         q0,  r2
-        vmov.16         d0[0], r1
-        vld1.16         {d2},     [r0,:64], r12
-        vld1.16         {d4},     [r0,:64], r12
-        vld1.16         {d6},     [r0,:64], r12
-        vld1.16         {d16},    [r0,:64], r12
-        vmull.s16       q1,  d2,  d0
-        vmull.s16       q2,  d4,  d1
-        vmull.s16       q3,  d6,  d1
-        vmull.s16       q8,  d16, d1
-        vqrshrn.s32     d2,  q1,  #4
-        vqrshrn.s32     d4,  q2,  #4
-        vqrshrn.s32     d6,  q3,  #4
-        vqrshrn.s32     d16, q8,  #4
-        vst1.16         {d2},     [r3,:64], r12
-        vst1.16         {d4},     [r3,:64], r12
-        vst1.16         {d6},     [r3,:64], r12
-        vst1.16         {d16},    [r3,:64], r12
-        bx              lr
-endfunc
author	Christophe GISQUET <christophe.gisquet@gmail.com>	2012-01-01 15:28:47 +0100
committer	Kostya Shishkov <kostya.shishkov@gmail.com>	2012-01-04 10:30:01 +0100
commit	98f24ecd6cfc9c57a555aae6bfcd3d9a4ce9503d (patch)
tree	a4e562412fcd7dacf25afdbd3c2f8cfa5737c911 /libavcodec/arm
parent	0749720b6cdce68e4908dc59f1b4e1399852372b (diff)