diff options
author | Paul B Mahol <onemda@gmail.com> | 2015-10-05 12:28:41 +0200 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2015-10-09 21:38:15 +0200 |
commit | 35af7add6fac7f926d0cbb5724f36f0fe5022488 (patch) | |
tree | 038c8d9a54b192093ed2c05b679a80ee6e0fa673 /libavcodec/x86 | |
parent | 2f4374fae135afeee84f987c8fab8cbad1c7fcc7 (diff) |
avcodec/takdec: add x86 SIMD for rest of decorrelation modes
Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/x86/takdsp.asm | 116 | ||||
-rw-r--r-- | libavcodec/x86/takdsp_init.c | 45 |
3 files changed, 163 insertions, 0 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index d8b091a266..a9d8032657 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o +OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o @@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o +YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm new file mode 100644 index 0000000000..d3c6a13f11 --- /dev/null +++ b/libavcodec/x86/takdsp.asm @@ -0,0 +1,116 @@ +;****************************************************************************** +;* TAK DSP SIMD optimizations +;* +;* Copyright (C) 2015 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pd_128: times 4 dd 128 + +SECTION .text + +INIT_XMM sse2 +cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length + shl lengthd, 2 + add p1q, lengthq + add p2q, lengthq + neg lengthq +.loop: + mova m0, [p1q+lengthq+mmsize*0] + mova m1, [p1q+lengthq+mmsize*1] + paddd m0, [p2q+lengthq+mmsize*0] + paddd m1, [p2q+lengthq+mmsize*1] + mova [p2q+lengthq+mmsize*0], m0 + mova [p2q+lengthq+mmsize*1], m1 + add lengthq, mmsize*2 + jl .loop + REP_RET + +cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length + shl lengthd, 2 + add p1q, lengthq + add p2q, lengthq + neg lengthq + +.loop: + mova m0, [p2q+lengthq+mmsize*0] + mova m1, [p2q+lengthq+mmsize*1] + psubd m0, [p1q+lengthq+mmsize*0] + psubd m1, [p1q+lengthq+mmsize*1] + mova [p1q+lengthq+mmsize*0], m0 + mova [p1q+lengthq+mmsize*1], m1 + add lengthq, mmsize*2 + jl .loop + REP_RET + +cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length + shl lengthd, 2 + add p1q, lengthq + add p2q, lengthq + neg lengthq + +.loop: + mova m0, [p1q+lengthq] + mova m1, [p2q+lengthq] + mova m3, [p1q+lengthq+mmsize] + mova m4, [p2q+lengthq+mmsize] + mova m2, m1 + mova m5, m4 + psrld m2, 1 + psrld m5, 1 + psubd m0, m2 + psubd m3, m5 + paddd m1, m0 + paddd m4, m3 + mova [p1q+lengthq], m0 + mova [p2q+lengthq], m1 + mova [p1q+lengthq+mmsize], m3 + mova [p2q+lengthq+mmsize], m4 + add lengthq, mmsize*2 + jl .loop + REP_RET + +INIT_XMM sse4 +cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor + shl lengthd, 2 + add p1q, lengthq + add p2q, lengthq + neg lengthq + + movd m2, dshiftm + movd m3, dfactorm + pshufd m3, m3, 0 + mova m4, [pd_128] + +.loop: + mova m0, [p1q+lengthq] + mova m1, [p2q+lengthq] + psrld m1, m2 + pmulld m1, m3 + paddd m1, m4 + psrld m1, 8 + pslld m1, m2 + psubd m1, m0 + mova [p1q+lengthq], m1 + add lengthq, mmsize + jl .loop + REP_RET diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c new file mode 100644 index 0000000000..555d0649c9 --- /dev/null +++ b/libavcodec/x86/takdsp_init.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/takdsp.h" +#include "libavutil/x86/cpu.h" +#include "config.h" + +void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor); + +av_cold void ff_takdsp_init_x86(TAKDSPContext *c) +{ +#if HAVE_YASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_SSE2(cpu_flags)) { + c->decorrelate_ls = ff_tak_decorrelate_ls_sse2; + c->decorrelate_sr = ff_tak_decorrelate_sr_sse2; + c->decorrelate_sm = ff_tak_decorrelate_sm_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) { + c->decorrelate_sf = ff_tak_decorrelate_sf_sse4; + } +#endif +} |