summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/aarch64/dcadsp_init.c7
-rw-r--r--libavcodec/aarch64/dcadsp_neon.S60
-rw-r--r--libavcodec/arm/dcadsp_init_arm.c7
-rw-r--r--libavcodec/arm/dcadsp_neon.S29
-rw-r--r--libavcodec/dcadata.c15
-rw-r--r--libavcodec/dcadata.h2
-rw-r--r--libavcodec/dcadec.c12
-rw-r--r--libavcodec/dcadsp.c21
-rw-r--r--libavcodec/dcadsp.h9
-rw-r--r--libavcodec/x86/dcadsp.asm86
-rw-r--r--libavcodec/x86/dcadsp_init.c20
-rw-r--r--tests/checkasm/dcadsp.c45
12 files changed, 9 insertions, 304 deletions
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c
index c66ec3f538..d3430d045c 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/dcadsp_init.c
@@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
-void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
-
av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
- s->decode_hf = ff_decode_hf_neon;
}
}
diff --git a/libavcodec/aarch64/dcadsp_neon.S b/libavcodec/aarch64/dcadsp_neon.S
index 73196d914b..4cd3328042 100644
--- a/libavcodec/aarch64/dcadsp_neon.S
+++ b/libavcodec/aarch64/dcadsp_neon.S
@@ -21,66 +21,6 @@
#include "libavutil/aarch64/asm.S"
-function ff_decode_hf_neon, export=1
- add x2, x2, x3
- add x0, x0, x5, lsl #5
- add x1, x1, x5, lsl #2
- add x4, x4, x5, lsl #3
- sub x6, x6, x5
- ldr w7, [x1], #4
- add x7, x2, x7, lsl #5
- subs x6, x6, #1
- b.eq 1f
- b.gt 2f
- ret
-2:
- ldr w8, [x1], #4
- subs x6, x6, #2
- add x8, x2, x8, lsl #5
- ld1 {v2.4s}, [x4], #16
- ld1 {v0.8b}, [x7]
- ld1 {v4.8b}, [x8]
- sxtl v3.8h, v0.8b
- sxtl v7.8h, v4.8b
- scvtf v2.4s, v2.4s, #4
- sxtl v0.4s, v3.4h
- sxtl2 v1.4s, v3.8h
- sxtl v4.4s, v7.4h
- sxtl2 v5.4s, v7.8h
- scvtf v0.4s, v0.4s
- scvtf v1.4s, v1.4s
- scvtf v4.4s, v4.4s
- scvtf v5.4s, v5.4s
- fmul v0.4s, v0.4s, v2.s[0]
- fmul v1.4s, v1.4s, v2.s[0]
- fmul v4.4s, v4.4s, v2.s[2]
- fmul v5.4s, v5.4s, v2.s[2]
- b.lt 10f
-
- ldr w7, [x1], #4
- add x7, x2, x7, lsl #5
- st1 {v0.4s,v1.4s}, [x0], #32
- st1 {v4.4s,v5.4s}, [x0], #32
- b.gt 2b
-1:
- ldr w9, [x4]
- ld1 {v0.8b}, [x7]
- scvtf s2, w9, #4
- sxtl v3.8h, v0.8b
- sxtl v0.4s, v3.4h
- sxtl2 v1.4s, v3.8h
- scvtf v0.4s, v0.4s
- scvtf v1.4s, v1.4s
- fmul v0.4s, v0.4s, v2.s[0]
- fmul v1.4s, v1.4s, v2.s[0]
- st1 {v0.4s,v1.4s}, [x0]
- ret
-10:
- st1 {v0.4s,v1.4s}, [x0], #32
- st1 {v4.4s,v5.4s}, [x0]
- ret
-endfunc
-
function ff_dca_lfe_fir0_neon, export=1
mov x3, #32 // decifactor
sub x1, x1, #7*4
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 252f4aeadd..bf0d9b4b17 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
-void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
-
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
- s->decode_hf = ff_decode_hf_neon;
}
}
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index 70580cdeec..735c4c28e5 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -20,35 +20,6 @@
#include "libavutil/arm/asm.S"
-function ff_decode_hf_neon, export=1
- push {r4-r5,lr}
- add r2, r2, r3
- ldr r3, [sp, #12]
- ldrd r4, r5, [sp, #16]
- add r3, r3, r4, lsl #3
- add r1, r1, r4, lsl #2
- add r0, r0, r4, lsl #5
-
-1: ldr_post lr, r1, #4
- add r4, r4, #1
- add lr, r2, lr, lsl #5
- cmp r4, r5
- vld1.32 {d7}, [r3]!
- vld1.8 {d0}, [lr,:64]
- vcvt.f32.s32 d7, d7, #4
- vmovl.s8 q1, d0
- vmovl.s16 q0, d2
- vmovl.s16 q1, d3
- vcvt.f32.s32 q0, q0
- vcvt.f32.s32 q1, q1
- vmul.f32 q0, q0, d7[0]
- vmul.f32 q1, q1, d7[0]
- vst1.32 {q0-q1}, [r0,:128]!
- bne 1b
-
- pop {r4-r5,pc}
-endfunc
-
function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr}
mov r3, #32 @ decifactor
diff --git a/libavcodec/dcadata.c b/libavcodec/dcadata.c
index 1db1938b29..6c41aef3a8 100644
--- a/libavcodec/dcadata.c
+++ b/libavcodec/dcadata.c
@@ -4187,13 +4187,6 @@ const uint32_t ff_dca_lossy_quant[32] = {
84, 42, 21, 0, 0, 0, 0, 0
};
-const float ff_dca_lossy_quant_d[32] = {
- 0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34,
- 0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040,
- 0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004,
- 0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0
-};
-
/* 20bits unsigned fractional binary codes */
const uint32_t ff_dca_lossless_quant[32] = {
0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127,
@@ -4202,14 +4195,6 @@ const uint32_t ff_dca_lossless_quant[32] = {
4, 2, 1, 0, 0, 0, 0, 0
};
-const float ff_dca_lossless_quant_d[32] = {
- 0, 1.0, 0.5, 0.33, 0.25, 0.166, 0.125,
- 0.083, 0.0625, 0.03125, 0.0156, 7.874E-3, 3.922E-3, 1.957E-3,
- 9.775E-4, 4.885E-4, 2.442E-4, 1.221E-4, 6.104E-5, 3.052E-5, 1.526E-5,
- 7.629E-6, 3.815E-6, 1.907E-6, 9.537E-7, 4.768E-7, 2.384E-7, 0,
- 0, 0, 0, 0
-};
-
/* Vector quantization tables */
DECLARE_ALIGNED(8, const int8_t, ff_dca_high_freq_vq)[1024][32] = {
diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h
index 7a9d994ed2..0a3139e327 100644
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@@ -35,10 +35,8 @@ extern const uint32_t ff_dca_scale_factor_quant6[64];
extern const uint32_t ff_dca_scale_factor_quant7[128];
extern const uint32_t ff_dca_lossy_quant[32];
-extern const float ff_dca_lossy_quant_d[32];
extern const uint32_t ff_dca_lossless_quant[32];
-extern const float ff_dca_lossless_quant_d[32];
extern const int8_t ff_dca_high_freq_vq[1024][32];
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 399b1e5d32..a25436cdec 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -913,12 +913,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
s->debug_flag |= 0x01;
}
- s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq,
- ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
- s->dca_chan[k].scale_factor,
- s->audio_header.vq_start_subband[k],
- s->audio_header.subband_activity[k]);
-
+ s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
+ ff_dca_high_freq_vq,
+ subsubframe * SAMPLES_PER_SUBBAND,
+ s->dca_chan[k].scale_factor,
+ s->audio_header.vq_start_subband[k],
+ s->audio_header.subband_activity[k]);
}
}
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index c333f4a44c..ec3561dd54 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -27,30 +27,12 @@
#include "dcadsp.h"
#include "dcamath.h"
-static void decode_hf_c(float dst[DCA_SUBBANDS][8],
+static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8],
const int32_t vq_num[DCA_SUBBANDS],
const int8_t hf_vq[1024][32], intptr_t vq_offset,
int32_t scale[DCA_SUBBANDS][2],
intptr_t start, intptr_t end)
{
- int i, l;
-
- for (l = start; l < end; l++) {
- /* 1 vector -> 32 samples but we only need the 8 samples
- * for this subsubframe. */
- const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
- float fscale = scale[l][0] * (1 / 16.0);
- for (i = 0; i < 8; i++)
- dst[l][i] = ptr[i] * fscale;
- }
-}
-
-static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end)
-{
int i, j;
for (j = start; j < end; j++) {
@@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
s->lfe_fir[1] = dca_lfe_fir1_c;
s->qmf_32_subbands = dca_qmf_32_subbands;
s->decode_hf = decode_hf_c;
- s->decode_hf_int = decode_hf_int_c;
s->dequantize = dequantize_c;
if (ARCH_AARCH64)
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index eac9de977c..ad361bfd01 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -32,17 +32,12 @@ typedef struct DCADSPContext {
int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out,
float raXin[32], float scale);
- void (*decode_hf)(float dst[DCA_SUBBANDS][8],
+ void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8],
const int32_t vq_num[DCA_SUBBANDS],
const int8_t hf_vq[1024][32], intptr_t vq_offset,
int32_t scale[DCA_SUBBANDS][2],
intptr_t start, intptr_t end);
- void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
- void (*dequantize)(int32_t *samples, uint32_t step_size, uint64_t scale);
+ void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale);
} DCADSPContext;
void ff_dcadsp_init(DCADSPContext *s);
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 18c7a0c354..89d4ac489a 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16
SECTION .text
-; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
-; const int8_t hf_vq[1024][32], intptr_t vq_offset,
-; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
-
-%macro DECODE_HF 0
-cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
- lea srcq, [srcq + offsetq]
- shl startq, 2
- mov offsetd, endm
-%define DICT offsetq
- shl offsetq, 2
- mov endm, offsetq
-.loop:
-%if ARCH_X86_64
- mov offsetd, [scaleq + 2 * startq]
- cvtsi2ss m0, offsetd
-%else
- cvtsi2ss m0, [scaleq + 2 * startq]
-%endif
- mov offsetd, [numq + startq]
- mulss m0, [pf_inv16]
- shl DICT, 5
- shufps m0, m0, 0
-%if cpuflag(sse2)
-%if cpuflag(sse4)
- pmovsxbd m1, [srcq + DICT + 0]
- pmovsxbd m2, [srcq + DICT + 4]
-%else
- movq m1, [srcq + DICT]
- punpcklbw m1, m1
- mova m2, m1
- punpcklwd m1, m1
- punpckhwd m2, m2
- psrad m1, 24
- psrad m2, 24
-%endif
- cvtdq2ps m1, m1
- cvtdq2ps m2, m2
-%else
- movd mm0, [srcq + DICT + 0]
- movd mm1, [srcq + DICT + 4]
- punpcklbw mm0, mm0
- punpcklbw mm1, mm1
- movq mm2, mm0
- movq mm3, mm1
- punpcklwd mm0, mm0
- punpcklwd mm1, mm1
- punpckhwd mm2, mm2
- punpckhwd mm3, mm3
- psrad mm0, 24
- psrad mm1, 24
- psrad mm2, 24
- psrad mm3, 24
- cvtpi2ps m1, mm0
- cvtpi2ps m2, mm1
- cvtpi2ps m3, mm2
- cvtpi2ps m4, mm3
- shufps m0, m0, 0
- shufps m1, m3, q1010
- shufps m2, m4, q1010
-%endif
- mulps m1, m0
- mulps m2, m0
- mova [dstq + 8 * startq + 0], m1
- mova [dstq + 8 * startq + 16], m2
- add startq, 4
- cmp startq, endm
- jl .loop
-.end:
-%if notcpuflag(sse2)
- emms
-%endif
- REP_RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_XMM sse
-DECODE_HF
-%endif
-
-INIT_XMM sse2
-DECODE_HF
-
-INIT_XMM sse4
-DECODE_HF
-
; %1=v0/v1 %2=in1 %3=in2
%macro FIR_LOOP 2-3
.loop%1:
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 7c2bec1f9b..8632c4a98f 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -23,15 +23,6 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/dcadsp.h"
-void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
@@ -40,20 +31,9 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE(cpu_flags)) {
-#if ARCH_X86_32
- s->decode_hf = ff_decode_hf_sse;
-#endif
s->lfe_fir[0] = ff_dca_lfe_fir0_sse;
s->lfe_fir[1] = ff_dca_lfe_fir1_sse;
}
-
- if (EXTERNAL_SSE2(cpu_flags)) {
- s->decode_hf = ff_decode_hf_sse2;
- }
-
- if (EXTERNAL_SSE4(cpu_flags)) {
- s->decode_hf = ff_decode_hf_sse4;
- }
}
diff --git a/tests/checkasm/dcadsp.c b/tests/checkasm/dcadsp.c
index 1665cbbc3d..7251362a1e 100644
--- a/tests/checkasm/dcadsp.c
+++ b/tests/checkasm/dcadsp.c
@@ -75,16 +75,6 @@
} \
} while (0)
-#define randomize_decode_hf() \
- do { \
- int i; \
- for (i = 0; i < DCA_SUBBANDS; i++) { \
- vq_num[i] = rnd() >> 22; \
- scale[i][0] = rnd() >> 26; \
- scale[i][1] = INT32_MIN; \
- } \
- } while (0)
-
void checkasm_check_dcadsp(void)
{
DCADSPContext c;
@@ -98,40 +88,5 @@ void checkasm_check_dcadsp(void)
if (check_func(c.lfe_fir[1], "dca_lfe_fir1"))
check_lfe_fir(64, 1.0e-6f);
- if (check_func(c.decode_hf, "dca_decode_hf")) {
- LOCAL_ALIGNED_16(float, dst0, [DCA_SUBBANDS], [8]);
- LOCAL_ALIGNED_16(float, dst1, [DCA_SUBBANDS], [8]);
- LOCAL_ALIGNED_16(int32_t, scale, [DCA_SUBBANDS], [2]);
- LOCAL_ALIGNED_16(int32_t, vq_num, [DCA_SUBBANDS]);
- intptr_t start, end = 32, offset;
-
- declare_func(void, float[DCA_SUBBANDS][8], const int32_t[DCA_SUBBANDS],
- const int8_t[1024][DCA_SUBBANDS], intptr_t, int32_t[DCA_SUBBANDS][2],
- intptr_t, intptr_t);
-
- for (start = 0; start < 32; start++) {
- for (offset = 0; offset < 32; offset += 8) {
- int j;
- for (j = 0; j < DCA_SUBBANDS; j++) {
- memset(dst0[j], 0, sizeof(*(dst0[j])) * 8);
- memset(dst1[j], 0, sizeof(*(dst1[j])) * 8);
- }
- randomize_decode_hf();
-
- call_ref(dst0, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
- call_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
-
- for (j = 0; j < 8 * DCA_SUBBANDS; j++) {
- if (!float_near_ulp(dst0[j>>3][j&7], dst1[j>>3][j&7], 1)) {
- fail();
- break;
- }
- }
-
- bench_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
- }
- }
- }
-
report("dcadsp");
}