summaryrefslogtreecommitdiff
path: root/libavfilter/lut3d.h
diff options
context:
space:
mode:
authorMark Reid <mindmark@gmail.com>2021-10-05 20:58:30 -0700
committerPaul B Mahol <onemda@gmail.com>2021-10-10 22:23:48 +0200
commit716b39674059d5b416faef92afd41654a6d9469b (patch)
tree25652c77af1ac70c439ba3e1a7f879d4b08cfb5b /libavfilter/lut3d.h
parent5133f4c2c1149feef3248ba2cb29537e8d8fbe38 (diff)
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines.  I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp,       1 runs,      0 skips 1434035335 UNITS in lut3d->interp,       2 runs,      0 skips 1423615347 UNITS in lut3d->interp,       4 runs,      0 skips 1426268863 UNITS in lut3d->interp,       8 runs,      0 skips sse2 905484420 UNITS in lut3d->interp,       1 runs,      0 skips 905659010 UNITS in lut3d->interp,       2 runs,      0 skips 915167140 UNITS in lut3d->interp,       4 runs,      0 skips 915834222 UNITS in lut3d->interp,       8 runs,      0 skips avx 574794860 UNITS in lut3d->interp,       1 runs,      0 skips 581035090 UNITS in lut3d->interp,       2 runs,      0 skips 584116720 UNITS in lut3d->interp,       4 runs,      0 skips 581460290 UNITS in lut3d->interp,       8 runs,      0 skips avx2 301698880 UNITS in lut3d->interp,       1 runs,      0 skips 301982880 UNITS in lut3d->interp,       2 runs,      0 skips 306962430 UNITS in lut3d->interp,       4 runs,      0 skips 305472025 UNITS in lut3d->interp,       8 runs,      0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp,       1 runs,      0 skips 1502922990 UNITS in lut3d->interp,       2 runs,      0 skips 1496114307 UNITS in lut3d->interp,       4 runs,      0 skips 1492554551 UNITS in lut3d->interp,       8 runs,      0 skips sse2 980777180 UNITS in lut3d->interp,       1 runs,      0 skips 986121520 UNITS in lut3d->interp,       2 runs,      0 skips 986489840 UNITS in lut3d->interp,       4 runs,      0 skips 998832248 UNITS in lut3d->interp,       8 runs,      0 skips avx 622212360 UNITS in lut3d->interp,       1 runs,      0 skips 622981160 UNITS in lut3d->interp,       2 runs,      0 skips 645396315 UNITS in lut3d->interp,       4 runs,      0 skips 641057075 UNITS in lut3d->interp,       8 runs,      0 skips avx2 321336400 UNITS in lut3d->interp,       1 runs,      0 skips 321268920 UNITS in lut3d->interp,       2 runs,      0 skips 323459895 UNITS in lut3d->interp,       4 runs,      0 skips 324949967 UNITS in lut3d->interp,       8 runs,      0 skips
Diffstat (limited to 'libavfilter/lut3d.h')
-rw-r--r--libavfilter/lut3d.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h
new file mode 100644
index 0000000000..bc32eac91c
--- /dev/null
+++ b/libavfilter/lut3d.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2013 Clément Bœsch
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_LUT3D_H
+#define AVFILTER_LUT3D_H
+
+#include "libavutil/pixdesc.h"
+#include "framesync.h"
+#include "avfilter.h"
+
+enum interp_mode {
+ INTERPOLATE_NEAREST,
+ INTERPOLATE_TRILINEAR,
+ INTERPOLATE_TETRAHEDRAL,
+ INTERPOLATE_PYRAMID,
+ INTERPOLATE_PRISM,
+ NB_INTERP_MODE
+};
+
+struct rgbvec {
+ float r, g, b;
+};
+
+/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
+ * of 512x512 (64x64x64) */
+#define MAX_LEVEL 256
+#define PRELUT_SIZE 65536
+
+typedef struct Lut3DPreLut {
+ int size;
+ float min[3];
+ float max[3];
+ float scale[3];
+ float* lut[3];
+} Lut3DPreLut;
+
+typedef struct LUT3DContext {
+ const AVClass *class;
+ struct rgbvec *lut;
+ int lutsize;
+ int lutsize2;
+ struct rgbvec scale;
+ int interpolation; ///<interp_mode
+ char *file;
+ uint8_t rgba_map[4];
+ int step;
+ avfilter_action_func *interp;
+ Lut3DPreLut prelut;
+#if CONFIG_HALDCLUT_FILTER
+ uint8_t clut_rgba_map[4];
+ int clut_step;
+ int clut_bits;
+ int clut_planar;
+ int clut_float;
+ int clut_width;
+ FFFrameSync fs;
+#endif
+} LUT3DContext;
+
+typedef struct ThreadData {
+ AVFrame *in, *out;
+} ThreadData;
+
+void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc);
+
+#endif /* AVFILTER_LUT3D_H */