From 1a094af638281295bf087945923d258b5acd1ab1 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sat, 30 Jan 2016 14:45:28 +0100 Subject: fft: Split MDCT bits off from FFT --- libavcodec/x86/Makefile | 1 + libavcodec/x86/fft.asm | 128 +++++++++++++++++++++++---------------------- libavcodec/x86/fft.h | 8 --- libavcodec/x86/fft_init.c | 7 --- libavcodec/x86/mdct.h | 32 ++++++++++++ libavcodec/x86/mdct_init.c | 51 ++++++++++++++++++ 6 files changed, 150 insertions(+), 77 deletions(-) create mode 100644 libavcodec/x86/mdct.h create mode 100644 libavcodec/x86/mdct_init.c (limited to 'libavcodec/x86') diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 8ae1283141..1a2cfb1d86 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -19,6 +19,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o OBJS-$(CONFIG_LPC) += x86/lpc.o +OBJS-$(CONFIG_MDCT) += x86/mdct_init.o OBJS-$(CONFIG_ME_CMP) += x86/me_cmp_init.o OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index d3be72e576..ef007f4eec 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -655,68 +655,6 @@ cglobal fft_permute, 2,7,1 jl .loopcopy REP_RET -%macro IMDCT_CALC_FUNC 0 -cglobal imdct_calc, 3,5,3 - mov r3d, [r0 + FFTContext.mdctsize] - mov r4, [r0 + FFTContext.imdcthalf] - add r1, r3 - PUSH r3 - PUSH r1 -%if ARCH_X86_32 - push r2 - push r1 - push r0 -%else - sub rsp, 8+32*WIN64 ; allocate win64 shadow space -%endif - call r4 -%if ARCH_X86_32 - add esp, 12 -%else - add rsp, 8+32*WIN64 -%endif - POP r1 - POP r3 - lea r0, [r1 + 2*r3] - mov r2, r3 - sub r3, mmsize - neg r2 - mova m2, [ps_m1m1m1m1] -.loop: -%if mmsize == 8 - PSWAPD m0, [r1 + r3] - PSWAPD m1, [r0 + r2] - pxor m0, m2 -%else - mova m0, [r1 + r3] - mova m1, [r0 + r2] - shufps m0, m0, 0x1b - shufps m1, m1, 0x1b - xorps m0, m2 -%endif - mova [r0 + r3], m1 - mova [r1 + r2], m0 - sub r3, mmsize - add r2, mmsize - jl .loop -%if cpuflag(3dnow) - femms - RET -%else - REP_RET -%endif -%endmacro - -%if ARCH_X86_32 -INIT_MMX 3dnow -IMDCT_CALC_FUNC -INIT_MMX 3dnowext -IMDCT_CALC_FUNC -%endif - -INIT_XMM sse -IMDCT_CALC_FUNC - %if ARCH_X86_32 INIT_MMX 3dnow %define mulps pfmul @@ -791,6 +729,70 @@ DECL_FFT 4 DECL_FFT 4, _interleave %endif +%if CONFIG_MDCT + +%macro IMDCT_CALC_FUNC 0 +cglobal imdct_calc, 3,5,3 + mov r3d, [r0 + FFTContext.mdctsize] + mov r4, [r0 + FFTContext.imdcthalf] + add r1, r3 + PUSH r3 + PUSH r1 +%if ARCH_X86_32 + push r2 + push r1 + push r0 +%else + sub rsp, 8+32*WIN64 ; allocate win64 shadow space +%endif + call r4 +%if ARCH_X86_32 + add esp, 12 +%else + add rsp, 8+32*WIN64 +%endif + POP r1 + POP r3 + lea r0, [r1 + 2*r3] + mov r2, r3 + sub r3, mmsize + neg r2 + mova m2, [ps_m1m1m1m1] +.loop: +%if mmsize == 8 + PSWAPD m0, [r1 + r3] + PSWAPD m1, [r0 + r2] + pxor m0, m2 +%else + mova m0, [r1 + r3] + mova m1, [r0 + r2] + shufps m0, m0, 0x1b + shufps m1, m1, 0x1b + xorps m0, m2 +%endif + mova [r0 + r3], m1 + mova [r1 + r2], m0 + sub r3, mmsize + add r2, mmsize + jl .loop +%if cpuflag(3dnow) + femms + RET +%else + REP_RET +%endif +%endmacro + +%if ARCH_X86_32 +INIT_MMX 3dnow +IMDCT_CALC_FUNC +INIT_MMX 3dnowext +IMDCT_CALC_FUNC +%endif + +INIT_XMM sse +IMDCT_CALC_FUNC + INIT_XMM sse %undef mulps %undef addps @@ -1081,3 +1083,5 @@ DECL_IMDCT POSROTATESHUF_3DNOW INIT_YMM avx DECL_IMDCT POSROTATESHUF_AVX + +%endif ; CONFIG_MDCT diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h index a604956836..94405d0cb4 100644 --- a/libavcodec/x86/fft.h +++ b/libavcodec/x86/fft.h @@ -27,12 +27,4 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z); -void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); - #endif /* AVCODEC_X86_FFT_H */ diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c index 2234d76f9d..ed1290997b 100644 --- a/libavcodec/x86/fft_init.c +++ b/libavcodec/x86/fft_init.c @@ -30,28 +30,21 @@ av_cold void ff_fft_init_x86(FFTContext *s) #if ARCH_X86_32 if (EXTERNAL_AMD3DNOW(cpu_flags)) { - s->imdct_calc = ff_imdct_calc_3dnow; - s->imdct_half = ff_imdct_half_3dnow; s->fft_calc = ff_fft_calc_3dnow; } if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { - s->imdct_calc = ff_imdct_calc_3dnowext; - s->imdct_half = ff_imdct_half_3dnowext; s->fft_calc = ff_fft_calc_3dnowext; } #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE(cpu_flags)) { - s->imdct_calc = ff_imdct_calc_sse; - s->imdct_half = ff_imdct_half_sse; s->fft_permute = ff_fft_permute_sse; s->fft_calc = ff_fft_calc_sse; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; } if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) { - s->imdct_half = ff_imdct_half_avx; s->fft_calc = ff_fft_calc_avx; s->fft_permutation = FF_FFT_PERM_AVX; } diff --git a/libavcodec/x86/mdct.h b/libavcodec/x86/mdct.h new file mode 100644 index 0000000000..cc107cb86a --- /dev/null +++ b/libavcodec/x86/mdct.h @@ -0,0 +1,32 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_MDCT_H +#define AVCODEC_X86_MDCT_H + +#include "libavcodec/fft.h" + +void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); + +#endif /* AVCODEC_X86_MDCT_H */ diff --git a/libavcodec/x86/mdct_init.c b/libavcodec/x86/mdct_init.c new file mode 100644 index 0000000000..db642d863d --- /dev/null +++ b/libavcodec/x86/mdct_init.c @@ -0,0 +1,51 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" + +#include "mdct.h" + +av_cold void ff_mdct_init_x86(FFTContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + +#if ARCH_X86_32 + if (EXTERNAL_AMD3DNOW(cpu_flags)) { + s->imdct_calc = ff_imdct_calc_3dnow; + s->imdct_half = ff_imdct_half_3dnow; + } + + if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { + s->imdct_calc = ff_imdct_calc_3dnowext; + s->imdct_half = ff_imdct_half_3dnowext; + } +#endif /* ARCH_X86_32 */ + + if (EXTERNAL_SSE(cpu_flags)) { + s->imdct_calc = ff_imdct_calc_sse; + s->imdct_half = ff_imdct_half_sse; + } + + if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) { + s->imdct_half = ff_imdct_half_avx; + } +} -- cgit v1.2.3