From 4cb6964244fd6c099383d8b7e99731e72cc844b9 Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisquet@gmail.com>
Date: Fri, 14 Feb 2014 15:03:13 +0000
Subject: dcadec: simplify decoding of VQ high frequencies

The vector dequantization has a test in a loop preventing effective SIMD
implementation. By moving it out of the loop, this loop can be DSPized.

Therefore, modify the current DSP implementation. In particular, the
DSP implementation no longer has to handle null loop sizes.

The decode_hf implementations have following timings:

For x86 Arrandale:
        C  SSE SSE2 SSE4
win32: 260 162  119  104
win64: 242 N/A   89   72

The arm NEON optimizations follow in a later patch as external asm. The
now unused check for the y modifier in arm inline asm is removed from
configure.
---
 libavcodec/dcadsp.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'libavcodec/dcadsp.h')

diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index e4c1bc7f05..0fa75a5ed6 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -22,6 +22,8 @@
 #include "avfft.h"
 #include "synth_filter.h"
 
+#define DCA_SUBBANDS 32
+
 typedef struct DCADSPContext {
     void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
     void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
@@ -30,7 +32,11 @@ typedef struct DCADSPContext {
                             int *synth_buf_offset, float synth_buf2[32],
                             const float window[512], float *samples_out,
                             float raXin[32], float scale);
-    void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale);
+    void (*decode_hf)(float dst[DCA_SUBBANDS][8],
+                      const int32_t vq_num[DCA_SUBBANDS],
+                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                      int32_t scale[DCA_SUBBANDS][2],
+                      intptr_t start, intptr_t end);
 } DCADSPContext;
 
 void ff_dcadsp_init(DCADSPContext *s);
-- 
cgit v1.2.3