summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2015-10-05 12:28:41 +0200
committerPaul B Mahol <onemda@gmail.com>2015-10-09 21:38:15 +0200
commit35af7add6fac7f926d0cbb5724f36f0fe5022488 (patch)
tree038c8d9a54b192093ed2c05b679a80ee6e0fa673 /libavcodec/x86
parent2f4374fae135afeee84f987c8fab8cbad1c7fcc7 (diff)
avcodec/takdec: add x86 SIMD for rest of decorrelation modes
Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/takdsp.asm116
-rw-r--r--libavcodec/x86/takdsp_init.c45
3 files changed, 163 insertions, 0 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index d8b091a266..a9d8032657 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
@@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
new file mode 100644
index 0000000000..d3c6a13f11
--- /dev/null
+++ b/libavcodec/x86/takdsp.asm
@@ -0,0 +1,116 @@
+;******************************************************************************
+;* TAK DSP SIMD optimizations
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pd_128: times 4 dd 128
+
+SECTION .text
+
+INIT_XMM sse2
+cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
+ shl lengthd, 2
+ add p1q, lengthq
+ add p2q, lengthq
+ neg lengthq
+.loop:
+ mova m0, [p1q+lengthq+mmsize*0]
+ mova m1, [p1q+lengthq+mmsize*1]
+ paddd m0, [p2q+lengthq+mmsize*0]
+ paddd m1, [p2q+lengthq+mmsize*1]
+ mova [p2q+lengthq+mmsize*0], m0
+ mova [p2q+lengthq+mmsize*1], m1
+ add lengthq, mmsize*2
+ jl .loop
+ REP_RET
+
+cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
+ shl lengthd, 2
+ add p1q, lengthq
+ add p2q, lengthq
+ neg lengthq
+
+.loop:
+ mova m0, [p2q+lengthq+mmsize*0]
+ mova m1, [p2q+lengthq+mmsize*1]
+ psubd m0, [p1q+lengthq+mmsize*0]
+ psubd m1, [p1q+lengthq+mmsize*1]
+ mova [p1q+lengthq+mmsize*0], m0
+ mova [p1q+lengthq+mmsize*1], m1
+ add lengthq, mmsize*2
+ jl .loop
+ REP_RET
+
+cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
+ shl lengthd, 2
+ add p1q, lengthq
+ add p2q, lengthq
+ neg lengthq
+
+.loop:
+ mova m0, [p1q+lengthq]
+ mova m1, [p2q+lengthq]
+ mova m3, [p1q+lengthq+mmsize]
+ mova m4, [p2q+lengthq+mmsize]
+ mova m2, m1
+ mova m5, m4
+ psrld m2, 1
+ psrld m5, 1
+ psubd m0, m2
+ psubd m3, m5
+ paddd m1, m0
+ paddd m4, m3
+ mova [p1q+lengthq], m0
+ mova [p2q+lengthq], m1
+ mova [p1q+lengthq+mmsize], m3
+ mova [p2q+lengthq+mmsize], m4
+ add lengthq, mmsize*2
+ jl .loop
+ REP_RET
+
+INIT_XMM sse4
+cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
+ shl lengthd, 2
+ add p1q, lengthq
+ add p2q, lengthq
+ neg lengthq
+
+ movd m2, dshiftm
+ movd m3, dfactorm
+ pshufd m3, m3, 0
+ mova m4, [pd_128]
+
+.loop:
+ mova m0, [p1q+lengthq]
+ mova m1, [p2q+lengthq]
+ psrld m1, m2
+ pmulld m1, m3
+ paddd m1, m4
+ psrld m1, 8
+ pslld m1, m2
+ psubd m1, m0
+ mova [p1q+lengthq], m1
+ add lengthq, mmsize
+ jl .loop
+ REP_RET
diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c
new file mode 100644
index 0000000000..555d0649c9
--- /dev/null
+++ b/libavcodec/x86/takdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/takdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+
+av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
+{
+#if HAVE_YASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
+ c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
+ c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
+ }
+
+ if (EXTERNAL_SSE4(cpu_flags)) {
+ c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
+ }
+#endif
+}