summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorRostislav Pehlivanov <atomnuker@gmail.com>2016-10-08 15:59:14 +0100
committerRostislav Pehlivanov <atomnuker@gmail.com>2016-10-18 21:41:18 +0100
commitd2ae5f77c61a29c3c63cc4c41c74ccfca4167649 (patch)
treebd615f22e6807e025da052c411e12e9a7be0852e /libavcodec/x86
parent3b02f6dd7be880fd6c1bcaf2fd0c1314dcee7aa6 (diff)
aacenc: add SIMD optimizations for abs_pow34 and quantization
Performance improvements: quant_bands: with: 681 decicycles in quant_bands, 8388453 runs, 155 skips without: 1190 decicycles in quant_bands, 8388386 runs, 222 skips Around 42% for the function Twoloop coder: abs_pow34: with/without: 7.82s/8.17s Around 4% for the entire encoder Both: with/without: 7.15s/8.17s Around 12% for the entire encoder Fast coder: abs_pow34: with/without: 3.40s/3.77s Around 10% for the entire encoder Both: with/without: 3.02s/3.77s Around 20% faster for the entire encoder Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com> Tested-by: Michael Niedermayer <michael@niedermayer.cc> Reviewed-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/aacencdsp.asm86
-rw-r--r--libavcodec/x86/aacencdsp_init.c43
3 files changed, 131 insertions, 0 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 522b6c2a6e..1db1137392 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -42,6 +42,7 @@ OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
# decoders/encoders
OBJS-$(CONFIG_AAC_DECODER) += x86/aacpsdsp_init.o \
x86/sbrdsp_init.o
+OBJS-$(CONFIG_AAC_ENCODER) += x86/aacencdsp_init.o
OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp_init.o
OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp_init.o
OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp_init.o
@@ -132,6 +133,7 @@ YASM-OBJS-$(CONFIG_VP8DSP) += x86/vp8dsp.o \
# decoders/encoders
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/aacpsdsp.o \
x86/sbrdsp.o
+YASM-OBJS-$(CONFIG_AAC_ENCODER) += x86/aacencdsp.o
YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
YASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o
diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
new file mode 100644
index 0000000000..97af571ec8
--- /dev/null
+++ b/libavcodec/x86/aacencdsp.asm
@@ -0,0 +1,86 @@
+;******************************************************************************
+;* SIMD optimized AAC encoder DSP functions
+;*
+;* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+float_abs_mask: times 4 dd 0x7fffffff
+
+SECTION .text
+
+;*******************************************************************
+;void ff_abs_pow34(float *out, const float *in, const int size);
+;*******************************************************************
+INIT_XMM sse
+cglobal abs_pow34, 3, 3, 3, out, in, size
+ mova m2, [float_abs_mask]
+ shl sizeq, 2
+ add inq, sizeq
+ add outq, sizeq
+ neg sizeq
+.loop:
+ andps m0, m2, [inq+sizeq]
+ sqrtps m1, m0
+ mulps m0, m1
+ sqrtps m0, m0
+ mova [outq+sizeq], m0
+ add sizeq, mmsize
+ jl .loop
+ RET
+
+;*******************************************************************
+;void ff_aac_quantize_bands(int *out, const float *in, const float *scaled,
+; int size, int is_signed, int maxval, const float Q34,
+; const float rounding)
+;*******************************************************************
+INIT_XMM sse2
+cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
+%if UNIX64 == 0
+ movss m0, Q34m
+ movss m1, roundingm
+ cvtsi2ss m3, dword maxvalm
+%else
+ cvtsi2ss m3, maxvald
+%endif
+ shufps m0, m0, 0
+ shufps m1, m1, 0
+ shufps m3, m3, 0
+ shl is_signedd, 31
+ movd m4, is_signedd
+ shufps m4, m4, 0
+ shl sized, 2
+ add inq, sizeq
+ add outq, sizeq
+ add scaledq, sizeq
+ neg sizeq
+.loop:
+ mulps m2, m0, [scaledq+sizeq]
+ addps m2, m1
+ minps m2, m3
+ andps m5, m4, [inq+sizeq]
+ orps m2, m5
+ cvttps2dq m2, m2
+ mova [outq+sizeq], m2
+ add sizeq, mmsize
+ jl .loop
+ RET
diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
new file mode 100644
index 0000000000..d761c3c5e6
--- /dev/null
+++ b/libavcodec/x86/aacencdsp_init.c
@@ -0,0 +1,43 @@
+/*
+ * AAC encoder assembly optimizations
+ * Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/float_dsp.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/aacenc.h"
+
+void ff_abs_pow34_sse(float *out, const float *in, const int size);
+
+void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
+ int size, int is_signed, int maxval, const float Q34,
+ const float rounding);
+
+av_cold void ff_aac_dsp_init_x86(AACEncContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_SSE(cpu_flags))
+ s->abs_pow34 = ff_abs_pow34_sse;
+
+ if (EXTERNAL_SSE2(cpu_flags))
+ s->quant_bands = ff_aac_quantize_bands_sse2;
+}