summaryrefslogtreecommitdiff
path: root/libavfilter/vf_nlmeans.c
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2018-05-06 10:54:49 +0200
committerClément Bœsch <u@pkh.me>2018-05-08 10:28:06 +0200
commit5a71bce3713ce0b074b1ad33f2c5e9c6bcddde2c (patch)
treed7df836f475e63fc4ee8dcfa15c9fee2220637d1 /libavfilter/vf_nlmeans.c
parent5ba14f4f1a5ef508698d6a5c8d579880c2901b22 (diff)
lavfi/nlmeans: add AArch64 SIMD for compute_safe_ssd_integral_image
ssd_integral_image_c: 49204.6 ssd_integral_image_neon: 28346.8
Diffstat (limited to 'libavfilter/vf_nlmeans.c')
-rw-r--r--libavfilter/vf_nlmeans.c26
1 files changed, 19 insertions, 7 deletions
diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
index b081a4e5af..c30e44498f 100644
--- a/libavfilter/vf_nlmeans.c
+++ b/libavfilter/vf_nlmeans.c
@@ -20,7 +20,6 @@
/**
* @todo
- * - SIMD for compute_safe_ssd_integral_image
* - SIMD for final weighted averaging
* - better automatic defaults? see "Parameters" @ http://www.ipol.im/pub/art/2011/bcm_nlm/
* - temporal support (probably doesn't need any displacement according to
@@ -37,6 +36,7 @@
#include "avfilter.h"
#include "formats.h"
#include "internal.h"
+#include "vf_nlmeans.h"
#include "video.h"
struct weighted_avg {
@@ -66,6 +66,7 @@ typedef struct NLMeansContext {
double weight_lut[WEIGHT_LUT_SIZE]; // lookup table mapping (scaled) patch differences to their associated weights
double pdiff_lut_scale; // scale factor for patch differences before looking into the LUT
int max_meaningful_diff; // maximum difference considered (if the patch difference is too high we ignore the pixel)
+ NLMeansDSPContext dsp;
} NLMeansContext;
#define OFFSET(x) offsetof(NLMeansContext, x)
@@ -240,7 +241,8 @@ static inline void compute_unsafe_ssd_integral_image(uint32_t *dst, ptrdiff_t ds
* @param h source height
* @param e research padding edge
*/
-static void compute_ssd_integral_image(uint32_t *ii, ptrdiff_t ii_linesize_32,
+static void compute_ssd_integral_image(const NLMeansDSPContext *dsp,
+ uint32_t *ii, ptrdiff_t ii_linesize_32,
const uint8_t *src, ptrdiff_t linesize, int offx, int offy,
int e, int w, int h)
{
@@ -291,10 +293,10 @@ static void compute_ssd_integral_image(uint32_t *ii, ptrdiff_t ii_linesize_32,
av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
if (safe_pw && safe_ph)
- compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
- src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
- src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
- safe_pw, safe_ph);
+ dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
+ src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
+ src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
+ safe_pw, safe_ph);
// right part of the integral
compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
@@ -431,7 +433,7 @@ static int nlmeans_plane(AVFilterContext *ctx, int w, int h, int p, int r,
.p = p,
};
- compute_ssd_integral_image(s->ii, s->ii_lz_32,
+ compute_ssd_integral_image(&s->dsp, s->ii, s->ii_lz_32,
src, src_linesize,
offx, offy, e, w, h);
ctx->internal->execute(ctx, nlmeans_slice, &td, NULL,
@@ -489,6 +491,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
} \
} while (0)
+void ff_nlmeans_init(NLMeansDSPContext *dsp)
+{
+ dsp->compute_safe_ssd_integral_image = compute_safe_ssd_integral_image_c;
+
+ if (ARCH_AARCH64)
+ ff_nlmeans_init_aarch64(dsp);
+}
+
static av_cold int init(AVFilterContext *ctx)
{
int i;
@@ -520,6 +530,8 @@ static av_cold int init(AVFilterContext *ctx)
s->research_size, s->research_size, s->research_size_uv, s->research_size_uv,
s->patch_size, s->patch_size, s->patch_size_uv, s->patch_size_uv);
+ ff_nlmeans_init(&s->dsp);
+
return 0;
}