diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-02-14 15:03:13 +0000 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2014-02-28 13:03:22 +0100 |
commit | 4cb6964244fd6c099383d8b7e99731e72cc844b9 (patch) | |
tree | 8b2fd29f4af8ba5d052e04d4dd1f4e97efaf73f2 /libavcodec/dcadsp.c | |
parent | 7686afd049be98d18663682b92d983340fa2c305 (diff) |
dcadec: simplify decoding of VQ high frequencies
The vector dequantization has a test in a loop preventing effective SIMD
implementation. By moving it out of the loop, this loop can be DSPized.
Therefore, modify the current DSP implementation. In particular, the
DSP implementation no longer has to handle null loop sizes.
The decode_hf implementations have following timings:
For x86 Arrandale:
C SSE SSE2 SSE4
win32: 260 162 119 104
win64: 242 N/A 89 72
The arm NEON optimizations follow in a later patch as external asm. The
now unused check for the y modifier in arm inline asm is removed from
configure.
Diffstat (limited to 'libavcodec/dcadsp.c')
-rw-r--r-- | libavcodec/dcadsp.c | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index 294a1b2736..18732dc80a 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -24,12 +24,22 @@ #include "libavutil/intreadwrite.h" #include "dcadsp.h" -static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale) +static void decode_hf_c(float dst[DCA_SUBBANDS][8], + const int32_t vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int32_t scale[DCA_SUBBANDS][2], + intptr_t start, intptr_t end) { - float fscale = scale / 16.0; - int i; - for (i = 0; i < 8; i++) - dst[i] = src[i] * fscale; + int i, l; + + for (l = start; l < end; l++) { + /* 1 vector -> 32 samples but we only need the 8 samples + * for this subsubframe. */ + const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset]; + float fscale = scale[l][0] * (1 / 16.0); + for (i = 0; i < 8; i++) + dst[l][i] = ptr[i] * fscale; + } } static inline void @@ -96,7 +106,7 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) s->lfe_fir[0] = dca_lfe_fir0_c; s->lfe_fir[1] = dca_lfe_fir1_c; s->qmf_32_subbands = dca_qmf_32_subbands; - s->int8x8_fmul_int32 = int8x8_fmul_int32_c; + s->decode_hf = decode_hf_c; if (ARCH_ARM) ff_dcadsp_init_arm(s); if (ARCH_X86) ff_dcadsp_init_x86(s); } |