diff options
author | Janne Grunau <janne-libav@jannau.net> | 2015-11-28 15:23:52 +0100 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2015-12-14 16:45:01 +0100 |
commit | c33c1fa8af2b2e82418a06901b6ad17b3d61b73e (patch) | |
tree | 527ed648444d04866df81da753379bfe692718b5 /libavcodec/aarch64/dcadsp_init.c | |
parent | e2710e790c09e49e86baa58c6063af0097cc8cb0 (diff) |
arm64: convert dcadsp neon asm from arm
~2% faster dts decoding overall.
cortex-a57 cortex-a53
dca_decode_hf_c: 474.8 1659.9
dca_decode_hf_neon: 225.2 301.1
dca_lfe_fir0_c: 913.2 1537.7
dca_lfe_fir0_neon: 286.8 451.9
dca_lfe_fir1_c: 848.7 1711.5
dca_lfe_fir1_neon: 387.1 506.4
Diffstat (limited to 'libavcodec/aarch64/dcadsp_init.c')
-rw-r--r-- | libavcodec/aarch64/dcadsp_init.c | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c new file mode 100644 index 0000000000..ad910700f0 --- /dev/null +++ b/libavcodec/aarch64/dcadsp_init.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/aarch64/cpu.h" +#include "libavutil/attributes.h" +#include "libavcodec/dcadsp.h" + +void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); +void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); + +void ff_synth_filter_float_neon(FFTContext *imdct, + float *synth_buf_ptr, int *synth_buf_offset, + float synth_buf2[32], const float window[512], + float out[32], const float in[32], + float scale); + +void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], + const int32_t vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int32_t scale[DCA_SUBBANDS][2], + intptr_t start, intptr_t end); + +av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + s->lfe_fir[0] = ff_dca_lfe_fir0_neon; + s->lfe_fir[1] = ff_dca_lfe_fir1_neon; + s->decode_hf = ff_decode_hf_neon; + } +} |