From 6aff2042d6483d6b9f07ef5165c8e31a871efee8 Mon Sep 17 00:00:00 2001 From: Ting Fu Date: Wed, 18 Sep 2019 15:05:34 +0800 Subject: avfilter/x86/vf_eq: Change inline assembly into nasm code Signed-off-by: Ting Fu --- libavfilter/x86/Makefile | 3 +- libavfilter/x86/vf_eq.asm | 82 +++++++++++++++++++++++++++++++++++++ libavfilter/x86/vf_eq.c | 96 -------------------------------------------- libavfilter/x86/vf_eq_init.c | 54 +++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 97 deletions(-) create mode 100644 libavfilter/x86/vf_eq.asm delete mode 100644 libavfilter/x86/vf_eq.c create mode 100644 libavfilter/x86/vf_eq_init.c (limited to 'libavfilter') diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index f12993e606..f2922c4597 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -6,7 +6,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o -OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o +OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o @@ -43,6 +43,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o +X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_eq.o X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm new file mode 100644 index 0000000000..8460342896 --- /dev/null +++ b/libavfilter/x86/vf_eq.asm @@ -0,0 +1,82 @@ +;***************************************************************************** +;* x86-optimized functions for eq filter +;* +;* Original MPlayer filters by Richard Felker. +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or modify +;* it under the terms of the GNU General Public License as published by +;* the Free Software Foundation; either version 2 of the License, or +;* (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;* GNU General Public License for more details. +;* +;* You should have received a copy of the GNU General Public License along +;* with FFmpeg; if not, write to the Free Software Foundation, Inc., +;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_MMX mmxext +cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w + movd m3, contrastd + movd m4, brightnessd + movsx r5d, contrastw + movsx r6d, brightnessw + SPLATW m3, m3, 0 + SPLATW m4, m4, 0 + + DEFINE_ARGS src, dst, tmp, scalar, w + xor tmpd, tmpd + pxor m0, m0 + pxor m1, m1 + mov scalard, wd + and scalard, mmsize-1 + sar wd, 3 + cmp wd, 1 + jl .loop1 + + .loop0: + movu m1, [srcq] + mova m2, m1 + punpcklbw m1, m0 + punpckhbw m2, m0 + psllw m1, 4 + psllw m2, 4 + pmulhw m1, m3 + pmulhw m2, m3 + paddw m1, m4 + paddw m2, m4 + packuswb m1, m2 + movu [dstq], m1 + add srcq, mmsize + add dstq, mmsize + sub wd, 1 + cmp wd, 0 + jne .loop0 + + .loop1: + cmp scalard, 0 + je .end + movzx tmpd, byte [srcq] + imul tmpd, r5d + sar tmpd, 12 + add tmpd, r6d + movd m1, tmpd + packuswb m1, m0 + movd tmpd, m1 + mov [dstq], tmpb + inc srcq + inc dstq + dec scalard + jmp .loop1 + + .end: + RET diff --git a/libavfilter/x86/vf_eq.c b/libavfilter/x86/vf_eq.c deleted file mode 100644 index 16f399505f..0000000000 --- a/libavfilter/x86/vf_eq.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * - * Original MPlayer filters by Richard Felker. - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with FFmpeg; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include "libavutil/attributes.h" -#include "libavutil/cpu.h" -#include "libavutil/mem.h" -#include "libavutil/x86/asm.h" -#include "libavfilter/vf_eq.h" - -#if HAVE_MMX_INLINE && HAVE_6REGS -static void process_MMX(EQParameters *param, uint8_t *dst, int dst_stride, - const uint8_t *src, int src_stride, int w, int h) -{ - int i; - int pel; - int dstep = dst_stride - w; - int sstep = src_stride - w; - short brvec[4]; - short contvec[4]; - int contrast = (int) (param->contrast * 256 * 16); - int brightness = ((int) (100.0 * param->brightness + 100.0) * 511) / 200 - 128 - contrast / 32; - - brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness; - contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast; - - while (h--) { - __asm__ volatile ( - "movq (%5), %%mm3 \n\t" - "movq (%6), %%mm4 \n\t" - "pxor %%mm0, %%mm0 \n\t" - "movl %4, %%eax \n\t" - ".p2align 4 \n\t" - "1: \n\t" - "movq (%0), %%mm1 \n\t" - "movq (%0), %%mm2 \n\t" - "punpcklbw %%mm0, %%mm1\n\t" - "punpckhbw %%mm0, %%mm2\n\t" - "psllw $4, %%mm1 \n\t" - "psllw $4, %%mm2 \n\t" - "pmulhw %%mm4, %%mm1 \n\t" - "pmulhw %%mm4, %%mm2 \n\t" - "paddw %%mm3, %%mm1 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "packuswb %%mm2, %%mm1 \n\t" - "add $8, %0 \n\t" - "movq %%mm1, (%1) \n\t" - "add $8, %1 \n\t" - "decl %%eax \n\t" - "jnz 1b \n\t" - : "=r" (src), "=r" (dst) - : "0" (src), "1" (dst), "r" (w>>3), "r" (brvec), "r" (contvec) - : "%eax" - ); - - for (i = w&7; i; i--) { - pel = ((*src++ * contrast) >> 12) + brightness; - if (pel & ~255) - pel = (-pel) >> 31; - *dst++ = pel; - } - - src += sstep; - dst += dstep; - } - __asm__ volatile ( "emms \n\t" ::: "memory" ); -} -#endif - -av_cold void ff_eq_init_x86(EQContext *eq) -{ -#if HAVE_MMX_INLINE && HAVE_6REGS - int cpu_flags = av_get_cpu_flags(); - - if (cpu_flags & AV_CPU_FLAG_MMX) { - eq->process = process_MMX; - } -#endif -} diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c new file mode 100644 index 0000000000..089961ffb2 --- /dev/null +++ b/libavfilter/x86/vf_eq_init.c @@ -0,0 +1,54 @@ +/* + * + * Original MPlayer filters by Richard Felker. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavfilter/vf_eq.h" + +extern void ff_process_one_line_mmxext(const uint8_t *src, uint8_t *dst, short contrast, + short brightness, int w); + +static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride, + const uint8_t *src, int src_stride, int w, int h) +{ + short contrast = (short) (param->contrast * 256 * 16); + short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) + / 200 - 128 - contrast / 32; + + while (h--) { + ff_process_one_line_mmxext(src, dst, contrast, brightness, w); + src += src_stride; + dst += dst_stride; + } + emms_c(); +} + +av_cold void ff_eq_init_x86(EQContext *eq) +{ + int cpu_flags = av_get_cpu_flags(); + + if (cpu_flags & AV_CPU_FLAG_MMXEXT) { + eq->process = process_mmxext; + } +} -- cgit v1.2.3