diff options
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/x86/audiodsp.asm | 133 | ||||
-rw-r--r-- | libavcodec/x86/audiodsp.h | 25 | ||||
-rw-r--r-- | libavcodec/x86/audiodsp_init.c | 66 | ||||
-rw-r--r-- | libavcodec/x86/dsputil.asm | 109 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_init.c | 39 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 1 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_x86.h | 4 |
8 files changed, 226 insertions, 153 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index fa03f7ce26..a08c525e30 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -2,6 +2,7 @@ OBJS += x86/constants.o \ x86/fmtconvert_init.o \ OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o +OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o @@ -69,6 +70,7 @@ YASM-OBJS += x86/deinterlace.o \ x86/fmtconvert.o \ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o +YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o YASM-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\ diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm new file mode 100644 index 0000000000..83f9bb6f45 --- /dev/null +++ b/libavcodec/x86/audiodsp.asm @@ -0,0 +1,133 @@ +;****************************************************************************** +;* optimized audio functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SCALARPRODUCT 0 +; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16, 3,3,3, v1, v2, order + shl orderq, 1 + add v1q, orderq + add v2q, orderq + neg orderq + pxor m2, m2 +.loop: + movu m0, [v1q + orderq] + movu m1, [v1q + orderq + mmsize] + pmaddwd m0, [v2q + orderq] + pmaddwd m1, [v2q + orderq + mmsize] + paddd m2, m0 + paddd m2, m1 + add orderq, mmsize*2 + jl .loop + HADDD m2, m0 + movd eax, m2 +%if mmsize == 8 + emms +%endif + RET +%endmacro + +INIT_MMX mmxext +SCALARPRODUCT +INIT_XMM sse2 +SCALARPRODUCT + + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +; %1 = number of xmm registers used +; %2 = number of inline load/process/store loops per asm loop +; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop +; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %5 = suffix +%macro VECTOR_CLIP_INT32 4-5 +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len +%if %4 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 0 +%rep %2 + mova m0, [srcq+mmsize*(0+%%i)] + mova m1, [srcq+mmsize*(1+%%i)] + mova m2, [srcq+mmsize*(2+%%i)] + mova m3, [srcq+mmsize*(3+%%i)] +%if %3 + mova m7, [srcq+mmsize*(4+%%i)] + mova m8, [srcq+mmsize*(5+%%i)] + mova m9, [srcq+mmsize*(6+%%i)] + mova m10, [srcq+mmsize*(7+%%i)] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %3 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*(0+%%i)], m0 + mova [dstq+mmsize*(1+%%i)], m1 + mova [dstq+mmsize*(2+%%i)], m2 + mova [dstq+mmsize*(3+%%i)], m3 +%if %3 + mova [dstq+mmsize*(4+%%i)], m7 + mova [dstq+mmsize*(5+%%i)], m8 + mova [dstq+mmsize*(6+%%i)], m9 + mova [dstq+mmsize*(7+%%i)], m10 +%endif +%assign %%i %%i+4*(%3+1) +%endrep + add srcq, mmsize*4*(%2+%3) + add dstq, mmsize*4*(%2+%3) + sub lend, mmsize*(%2+%3) + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 0, 1, 0, 0 +INIT_XMM sse2 +VECTOR_CLIP_INT32 6, 1, 0, 0, _int +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 6, 2, 0, 1 +INIT_XMM sse4 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 11, 1, 1, 0 +%else +VECTOR_CLIP_INT32 6, 1, 0, 0 +%endif diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/x86/audiodsp.h new file mode 100644 index 0000000000..35f9f1485b --- /dev/null +++ b/libavcodec/x86/audiodsp.h @@ -0,0 +1,25 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_AUDIODSP_H +#define AVCODEC_X86_AUDIODSP_H + +void ff_vector_clipf_sse(float *dst, const float *src, + float min, float max, int len); + +#endif /* AVCODEC_X86_AUDIODSP_H */ diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c new file mode 100644 index 0000000000..d586bf6c04 --- /dev/null +++ b/libavcodec/x86/audiodsp_init.c @@ -0,0 +1,66 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp.h" + +int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, + int order); +int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, + int order); + +void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); + +av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_mmx; + + if (EXTERNAL_MMXEXT(cpu_flags)) + c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; + + if (EXTERNAL_SSE(cpu_flags)) + c->vector_clipf = ff_vector_clipf_sse; + + if (EXTERNAL_SSE2(cpu_flags)) { + c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; + if (cpu_flags & AV_CPU_FLAG_ATOM) + c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; + else + c->vector_clip_int32 = ff_vector_clip_int32_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_sse4; +} diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index 3bb5d9cbfe..e261c0fcc7 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -30,115 +30,6 @@ cextern pb_80 SECTION_TEXT -%macro SCALARPRODUCT 0 -; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) -cglobal scalarproduct_int16, 3,3,3, v1, v2, order - shl orderq, 1 - add v1q, orderq - add v2q, orderq - neg orderq - pxor m2, m2 -.loop: - movu m0, [v1q + orderq] - movu m1, [v1q + orderq + mmsize] - pmaddwd m0, [v2q + orderq] - pmaddwd m1, [v2q + orderq + mmsize] - paddd m2, m0 - paddd m2, m1 - add orderq, mmsize*2 - jl .loop - HADDD m2, m0 - movd eax, m2 -%if mmsize == 8 - emms -%endif - RET -%endmacro - -INIT_MMX mmxext -SCALARPRODUCT -INIT_XMM sse2 -SCALARPRODUCT - - -;----------------------------------------------------------------------------- -; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, -; int32_t max, unsigned int len) -;----------------------------------------------------------------------------- - -; %1 = number of xmm registers used -; %2 = number of inline load/process/store loops per asm loop -; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop -; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) -; %5 = suffix -%macro VECTOR_CLIP_INT32 4-5 -cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len -%if %4 - cvtsi2ss m4, minm - cvtsi2ss m5, maxm -%else - movd m4, minm - movd m5, maxm -%endif - SPLATD m4 - SPLATD m5 -.loop: -%assign %%i 0 -%rep %2 - mova m0, [srcq+mmsize*(0+%%i)] - mova m1, [srcq+mmsize*(1+%%i)] - mova m2, [srcq+mmsize*(2+%%i)] - mova m3, [srcq+mmsize*(3+%%i)] -%if %3 - mova m7, [srcq+mmsize*(4+%%i)] - mova m8, [srcq+mmsize*(5+%%i)] - mova m9, [srcq+mmsize*(6+%%i)] - mova m10, [srcq+mmsize*(7+%%i)] -%endif - CLIPD m0, m4, m5, m6 - CLIPD m1, m4, m5, m6 - CLIPD m2, m4, m5, m6 - CLIPD m3, m4, m5, m6 -%if %3 - CLIPD m7, m4, m5, m6 - CLIPD m8, m4, m5, m6 - CLIPD m9, m4, m5, m6 - CLIPD m10, m4, m5, m6 -%endif - mova [dstq+mmsize*(0+%%i)], m0 - mova [dstq+mmsize*(1+%%i)], m1 - mova [dstq+mmsize*(2+%%i)], m2 - mova [dstq+mmsize*(3+%%i)], m3 -%if %3 - mova [dstq+mmsize*(4+%%i)], m7 - mova [dstq+mmsize*(5+%%i)], m8 - mova [dstq+mmsize*(6+%%i)], m9 - mova [dstq+mmsize*(7+%%i)], m10 -%endif -%assign %%i %%i+4*(%3+1) -%endrep - add srcq, mmsize*4*(%2+%3) - add dstq, mmsize*4*(%2+%3) - sub lend, mmsize*(%2+%3) - jg .loop - REP_RET -%endmacro - -INIT_MMX mmx -%define CLIPD CLIPD_MMX -VECTOR_CLIP_INT32 0, 1, 0, 0 -INIT_XMM sse2 -VECTOR_CLIP_INT32 6, 1, 0, 0, _int -%define CLIPD CLIPD_SSE2 -VECTOR_CLIP_INT32 6, 2, 0, 1 -INIT_XMM sse4 -%define CLIPD CLIPD_SSE41 -%ifdef m8 -VECTOR_CLIP_INT32 11, 1, 1, 0 -%else -VECTOR_CLIP_INT32 6, 1, 0, 0 -%endif - ; %1 = aligned/unaligned %macro BSWAP_LOOPS 1 mov r3, r2 diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 5c12364c23..ed58598810 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -29,23 +29,9 @@ #include "dsputil_x86.h" #include "idct_xvid.h" -int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, - int order); -int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, - int order); - void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); -void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); - static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { @@ -81,7 +67,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_MMX_INLINE */ #if HAVE_MMX_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; #endif /* HAVE_MMX_EXTERNAL */ } @@ -96,19 +81,12 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, c->idct = ff_idct_xvid_mmxext; } #endif /* HAVE_MMXEXT_INLINE */ - -#if HAVE_MMXEXT_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; -#endif /* HAVE_MMXEXT_EXTERNAL */ } static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { #if HAVE_YASM -#if HAVE_SSE_EXTERNAL - c->vector_clipf = ff_vector_clipf_sse; -#endif #if HAVE_INLINE_ASM && CONFIG_VIDEODSP c->gmc = ff_gmc_sse; #endif @@ -128,12 +106,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSE2_INLINE */ #if HAVE_SSE2_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; - if (cpu_flags & AV_CPU_FLAG_ATOM) { - c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; - } else { - c->vector_clip_int32 = ff_vector_clip_int32_sse2; - } c->bswap_buf = ff_bswap32_buf_sse2; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; #endif /* HAVE_SSE2_EXTERNAL */ @@ -147,14 +119,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSSE3_EXTERNAL */ } -static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_SSE4_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_sse4; -#endif /* HAVE_SSE4_EXTERNAL */ -} - av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { @@ -175,9 +139,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); - if (EXTERNAL_SSE4(cpu_flags)) - dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3f187b70b5..54aba38b53 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -28,7 +28,6 @@ #include "libavutil/x86/asm.h" #include "libavcodec/pixels.h" #include "libavcodec/videodsp.h" -#include "constants.h" #include "dsputil_x86.h" #include "inline_asm.h" diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index e723df1937..b5d7291f28 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -53,10 +53,6 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); -void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); - - void ff_mmx_idct(int16_t *block); void ff_mmxext_idct(int16_t *block); |