From 8f9fe6ae3461ce270bce6b7083fda5ec314cdad4 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 19 Apr 2014 18:17:23 +0200 Subject: aarch64: NEON fixed/floating point MPADSP apply_window 30%/25% (fixed/float) faster mp3 decoding on Apple's A7. The floating point decoder is approximately 7% faster. --- libavcodec/aarch64/Makefile | 2 + libavcodec/aarch64/mpegaudiodsp_init.c | 39 ++++++ libavcodec/aarch64/mpegaudiodsp_neon.S | 226 +++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+) create mode 100644 libavcodec/aarch64/mpegaudiodsp_init.c create mode 100644 libavcodec/aarch64/mpegaudiodsp_neon.S (limited to 'libavcodec/aarch64') diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index ec1fd1eccc..21d169559c 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -3,6 +3,7 @@ OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o +OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o @@ -18,4 +19,5 @@ NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \ aarch64/hpeldsp_neon.o NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o +NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o diff --git a/libavcodec/aarch64/mpegaudiodsp_init.c b/libavcodec/aarch64/mpegaudiodsp_init.c new file mode 100644 index 0000000000..a8b2baf955 --- /dev/null +++ b/libavcodec/aarch64/mpegaudiodsp_init.c @@ -0,0 +1,39 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/aarch64/cpu.h" +#include "libavcodec/mpegaudiodsp.h" +#include "config.h" + +void ff_mpadsp_apply_window_fixed_neon(int32_t *synth_buf, int32_t *window, + int *dither, int16_t *samples, int incr); +void ff_mpadsp_apply_window_float_neon(float *synth_buf, float *window, + int *dither, float *samples, int incr); + +av_cold void ff_mpadsp_init_aarch64(MPADSPContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + s->apply_window_fixed = ff_mpadsp_apply_window_fixed_neon; + s->apply_window_float = ff_mpadsp_apply_window_float_neon; + } +} diff --git a/libavcodec/aarch64/mpegaudiodsp_neon.S b/libavcodec/aarch64/mpegaudiodsp_neon.S new file mode 100644 index 0000000000..39875fed4a --- /dev/null +++ b/libavcodec/aarch64/mpegaudiodsp_neon.S @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2014 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/asm.S" + +#define FRAC_BITS 23 // fractional bits for sb_samples and dct +#define WFRAC_BITS 16 // fractional bits for window +#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) + +const tbl_rev128.s align=4 + .byte 12, 13, 14, 15 + .byte 8, 9, 10, 11 + .byte 4, 5, 6, 7 + .byte 0, 1, 2, 3 +endconst + +.macro apply_window type, st +function ff_mpadsp_apply_window_\type\()_neon, export=1 + mov x7, x0 + sxtw x4, w4 // incr + add x8, x0, #512<<2 + ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x7], #64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x7], #64 + st1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x8], #64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x8], #64 + movrel x15, tbl_rev128.s + ld1 {v27.4s}, [x15] +.ifc \type, fixed + lsl x4, x4, #1 +.else + lsl x4, x4, #2 +.endif + add x10, x0, #45<<2 + add x0, x0, #16<<2 + add x1, x1, #16<<2 + add x5, x3, x4, lsl #5 + sub x5, x5, x4 // samples2 + neg x13, x4 // -incr + mov x9, #64<<2 +.ifc \type, fixed + ld1r {v16.2s}, [x2] // dither_state + sxtl v16.2d, v16.2s + movi v29.2d, #0 + movi v30.2d, #(1<