summaryrefslogtreecommitdiff
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorMarton Balint <cus@passwd.hu>2018-04-05 01:37:25 +0200
committerMarton Balint <cus@passwd.hu>2018-11-11 20:30:50 +0100
commit6c2a7a8e9a3698f37913d3f24723fbb8fa895798 (patch)
treee078a0d996756411dd2087fbdce35474cce539cf /libavfilter/x86
parent6df9020f45eaff66ba2c2bac98cda9ddaacb03f3 (diff)
avfilter/vf_framerate: factorize SAD functions which compute SAD for a whole frame
Also add SIMD which works on lines because it is faster then calculating it on 8x8 blocks using pixelutils. Signed-off-by: Marton Balint <cus@passwd.hu>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/Makefile4
-rw-r--r--libavfilter/x86/scene_sad.asm74
-rw-r--r--libavfilter/x86/scene_sad_init.c52
3 files changed, 130 insertions, 0 deletions
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index b484c8bd1c..6eecb94359 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,3 +1,5 @@
+OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
+
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
@@ -29,6 +31,8 @@ OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
+X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
+
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
new file mode 100644
index 0000000000..d38d71ccca
--- /dev/null
+++ b/libavfilter/x86/scene_sad.asm
@@ -0,0 +1,74 @@
+;*****************************************************************************
+;* x86-optimized functions for scene SAD
+;*
+;* Copyright (C) 2018 Marton Balint
+;*
+;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+
+%macro SAD_INIT 0
+cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
+ add src1q, widthq
+ add src2q, widthq
+ neg widthq
+ pxor m1, m1
+%endmacro
+
+
+%macro SAD_LOOP 0
+.nextrow:
+ mov xq, widthq
+
+ .loop:
+ movu m0, [src1q + xq]
+ psadbw m0, [src2q + xq]
+ paddq m1, m0
+ add xq, mmsize
+ jl .loop
+ add src1q, stride1q
+ add src2q, stride2q
+ sub endd, 1
+ jg .nextrow
+
+ mov r0q, r6mp
+ movu [r0q], m1 ; sum
+REP_RET
+%endmacro
+
+
+%macro SAD_FRAMES 0
+ SAD_INIT
+ SAD_LOOP
+%endmacro
+
+
+INIT_XMM sse2
+SAD_FRAMES
+
+%if HAVE_AVX2_EXTERNAL
+
+INIT_YMM avx2
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
new file mode 100644
index 0000000000..461fa406d9
--- /dev/null
+++ b/libavfilter/x86/scene_sad_init.c
@@ -0,0 +1,52 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/scene_sad.h"
+
+#define SCENE_SAD_FUNC(FUNC_NAME, ASM_FUNC_NAME, MMSIZE) \
+void ASM_FUNC_NAME(SCENE_SAD_PARAMS); \
+ \
+static void FUNC_NAME(SCENE_SAD_PARAMS) { \
+ uint64_t sad[MMSIZE / 8] = {0}; \
+ ptrdiff_t awidth = width & ~(MMSIZE - 1); \
+ *sum = 0; \
+ ASM_FUNC_NAME(src1, stride1, src2, stride2, awidth, height, sad); \
+ for (int i = 0; i < MMSIZE / 8; i++) \
+ *sum += sad[i]; \
+ ff_scene_sad_c(src1 + awidth, stride1, \
+ src2 + awidth, stride2, \
+ width - awidth, height, sad); \
+ *sum += sad[0]; \
+}
+
+SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16);
+SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32);
+
+ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+ if (depth == 8) {
+ if (EXTERNAL_AVX2_FAST(cpu_flags))
+ return scene_sad_avx2;
+ else if (EXTERNAL_SSE2(cpu_flags))
+ return scene_sad_sse2;
+ }
+ return NULL;
+}