From dfc58584b4e5c35d6aebc5d21223bf714024f0ea Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 12 Jul 2015 19:33:06 -0400 Subject: vf_ssim: x86 simd for ssim_4x4xN and ssim_endN. Both are 2-2.5x faster than their C counterpart. Reviewed-by: Paul B Mahol Reviewed-by: James Almer Signed-off-by: Michael Niedermayer --- libavfilter/x86/vf_ssim_init.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 libavfilter/x86/vf_ssim_init.c (limited to 'libavfilter/x86/vf_ssim_init.c') diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c new file mode 100644 index 0000000000..9514b25ee3 --- /dev/null +++ b/libavfilter/x86/vf_ssim_init.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2015 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/cpu.h" + +#include "libavfilter/ssim.h" + +void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, + const uint8_t *ref, ptrdiff_t ref_stride, + int (*sums)[4], int w); +float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); + +void ff_ssim_init_x86(SSIMDSPContext *dsp) +{ + int cpu_flags = av_get_cpu_flags(); + + if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags)) + dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; + if (EXTERNAL_SSE4(cpu_flags)) + dsp->ssim_end_line = ff_ssim_end_line_sse4; +} -- cgit v1.2.3