From 844890b1bc86316a38bc9e1dbf8ba0dd254307e3 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 14 Oct 2021 00:57:03 +0200 Subject: avfilter/vf_fftfilt: add slice threading support --- libavfilter/vf_fftfilt.c | 331 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 229 insertions(+), 102 deletions(-) diff --git a/libavfilter/vf_fftfilt.c b/libavfilter/vf_fftfilt.c index 237306fecf..d5e49c345d 100644 --- a/libavfilter/vf_fftfilt.c +++ b/libavfilter/vf_fftfilt.c @@ -32,6 +32,7 @@ #include "libavcodec/avfft.h" #include "libavutil/eval.h" +#define MAX_THREADS 32 #define MAX_PLANES 4 enum EvalMode { @@ -46,13 +47,14 @@ typedef struct FFTFILTContext { int eval_mode; int depth; int nb_planes; + int nb_threads; int planewidth[MAX_PLANES]; int planeheight[MAX_PLANES]; - RDFTContext *hrdft[MAX_PLANES]; - RDFTContext *vrdft[MAX_PLANES]; - RDFTContext *ihrdft[MAX_PLANES]; - RDFTContext *ivrdft[MAX_PLANES]; + RDFTContext *hrdft[MAX_THREADS][MAX_PLANES]; + RDFTContext *vrdft[MAX_THREADS][MAX_PLANES]; + RDFTContext *ihrdft[MAX_THREADS][MAX_PLANES]; + RDFTContext *ivrdft[MAX_THREADS][MAX_PLANES]; int rdft_hbits[MAX_PLANES]; int rdft_vbits[MAX_PLANES]; size_t rdft_hlen[MAX_PLANES]; @@ -65,8 +67,8 @@ typedef struct FFTFILTContext { AVExpr *weight_expr[MAX_PLANES]; double *weight[MAX_PLANES]; - void (*rdft_horizontal)(struct FFTFILTContext *s, AVFrame *in, int w, int h, int plane); - void (*irdft_horizontal)(struct FFTFILTContext *s, AVFrame *out, int w, int h, int plane); + int (*rdft_horizontal)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); + int (*irdft_horizontal)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); } FFTFILTContext; static const char *const var_names[] = { "X", "Y", "W", "H", "N", NULL }; @@ -102,7 +104,7 @@ static double weight_Y(void *priv, double x, double y) { return lum(priv, x, y, static double weight_U(void *priv, double x, double y) { return lum(priv, x, y, U); } static double weight_V(void *priv, double x, double y) { return lum(priv, x, y, V); } -static void copy_rev (FFTSample *dest, int w, int w2) +static void copy_rev(FFTSample *dest, int w, int w2) { int i; @@ -113,100 +115,115 @@ static void copy_rev (FFTSample *dest, int w, int w2) dest[i] = dest[w2 - i]; } -/*Horizontal pass - RDFT*/ -static void rdft_horizontal8(FFTFILTContext *s, AVFrame *in, int w, int h, int plane) +static int rdft_horizontal8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { - int i, j; + FFTFILTContext *s = ctx->priv; + AVFrame *in = arg; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int w = s->planewidth[plane]; + const int h = s->planeheight[plane]; + const int slice_start = (h * jobnr) / nb_jobs; + const int slice_end = (h * (jobnr+1)) / nb_jobs; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) - s->rdft_hdata[plane][i * s->rdft_hlen[plane] + j] = *(in->data[plane] + in->linesize[plane] * i + j); + for (int i = slice_start; i < slice_end; i++) { + const uint8_t *src = in->data[plane] + i * in->linesize[plane]; + float *hdata = s->rdft_hdata[plane] + i * s->rdft_hlen[plane]; + + for (int j = 0; j < w; j++) + hdata[j] = src[j]; + + copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]); + } - copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]); + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->hrdft[jobnr][plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); } - for (i = 0; i < h; i++) - av_rdft_calc(s->hrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); + return 0; } -static void rdft_horizontal16(FFTFILTContext *s, AVFrame *in, int w, int h, int plane) +static int rdft_horizontal16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { - const uint16_t *src = (const uint16_t *)in->data[plane]; - int linesize = in->linesize[plane] / 2; - int i, j; + FFTFILTContext *s = ctx->priv; + AVFrame *in = arg; - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) - s->rdft_hdata[plane][i * s->rdft_hlen[plane] + j] = *(src + linesize * i + j); + for (int plane = 0; plane < s->nb_planes; plane++) { + const int w = s->planewidth[plane]; + const int h = s->planeheight[plane]; + const int slice_start = (h * jobnr) / nb_jobs; + const int slice_end = (h * (jobnr+1)) / nb_jobs; - copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]); - } + for (int i = slice_start; i < slice_end; i++) { + const uint16_t *src = (const uint16_t *)(in->data[plane] + i * in->linesize[plane]); + float *hdata = s->rdft_hdata[plane] + i * s->rdft_hlen[plane]; - for (i = 0; i < h; i++) - av_rdft_calc(s->hrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); -} + for (int j = 0; j < w; j++) + hdata[j] = src[j]; -/*Vertical pass - RDFT*/ -static void rdft_vertical(FFTFILTContext *s, int h, int plane) -{ - int i, j; + copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]); + } - for (i = 0; i < s->rdft_hlen[plane]; i++) { - for (j = 0; j < h; j++) - s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j] = - s->rdft_hdata[plane][j * s->rdft_hlen[plane] + i]; - copy_rev(s->rdft_vdata[plane] + i * s->rdft_vlen[plane], h, s->rdft_vlen[plane]); + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->hrdft[jobnr][plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); } - for (i = 0; i < s->rdft_hlen[plane]; i++) - av_rdft_calc(s->vrdft[plane], s->rdft_vdata[plane] + i * s->rdft_vlen[plane]); + return 0; } -/*Vertical pass - IRDFT*/ -static void irdft_vertical(FFTFILTContext *s, int h, int plane) + +static int irdft_horizontal8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { - int i, j; + FFTFILTContext *s = ctx->priv; + AVFrame *out = arg; - for (i = 0; i < s->rdft_hlen[plane]; i++) - av_rdft_calc(s->ivrdft[plane], s->rdft_vdata[plane] + i * s->rdft_vlen[plane]); + for (int plane = 0; plane < s->nb_planes; plane++) { + const int w = s->planewidth[plane]; + const int h = s->planeheight[plane]; + const int slice_start = (h * jobnr) / nb_jobs; + const int slice_end = (h * (jobnr+1)) / nb_jobs; - for (i = 0; i < s->rdft_hlen[plane]; i++) - for (j = 0; j < h; j++) - s->rdft_hdata[plane][j * s->rdft_hlen[plane] + i] = - s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j]; -} + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->ihrdft[jobnr][plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); -/*Horizontal pass - IRDFT*/ -static void irdft_horizontal8(FFTFILTContext *s, AVFrame *out, int w, int h, int plane) -{ - int i, j; + for (int i = slice_start; i < slice_end; i++) { + const float scale = 4.f / (s->rdft_hlen[plane] * s->rdft_vlen[plane]); + const float *src = s->rdft_hdata[plane] + i * s->rdft_hlen[plane]; + uint8_t *dst = out->data[plane] + i * out->linesize[plane]; - for (i = 0; i < h; i++) - av_rdft_calc(s->ihrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); + for (int j = 0; j < w; j++) + dst[j] = av_clip(lrintf(src[j] * scale), 0, 255); + } + } - for (i = 0; i < h; i++) - for (j = 0; j < w; j++) - *(out->data[plane] + out->linesize[plane] * i + j) = av_clip(s->rdft_hdata[plane][i - *s->rdft_hlen[plane] + j] * 4 / - (s->rdft_hlen[plane] * - s->rdft_vlen[plane]), 0, 255); + return 0; } -static void irdft_horizontal16(FFTFILTContext *s, AVFrame *out, int w, int h, int plane) +static int irdft_horizontal16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { - uint16_t *dst = (uint16_t *)out->data[plane]; - int linesize = out->linesize[plane] / 2; - int max = (1 << s->depth) - 1; - int i, j; + FFTFILTContext *s = ctx->priv; + AVFrame *out = arg; + + for (int plane = 0; plane < s->nb_planes; plane++) { + int max = (1 << s->depth) - 1; + const int w = s->planewidth[plane]; + const int h = s->planeheight[plane]; + const int slice_start = (h * jobnr) / nb_jobs; + const int slice_end = (h * (jobnr+1)) / nb_jobs; + + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->ihrdft[jobnr][plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); + + for (int i = slice_start; i < slice_end; i++) { + const float scale = 4.f / (s->rdft_hlen[plane] * s->rdft_vlen[plane]); + const float *src = s->rdft_hdata[plane] + i * s->rdft_hlen[plane]; + uint16_t *dst = (uint16_t *)(out->data[plane] + i * out->linesize[plane]); - for (i = 0; i < h; i++) - av_rdft_calc(s->ihrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]); + for (int j = 0; j < w; j++) + dst[j] = av_clip(lrintf(src[j] * scale), 0, max); + } + } - for (i = 0; i < h; i++) - for (j = 0; j < w; j++) - *(dst + linesize * i + j) = av_clip(s->rdft_hdata[plane][i - *s->rdft_hlen[plane] + j] * 4 / - (s->rdft_hlen[plane] * - s->rdft_vlen[plane]), 0, max); + return 0; } static av_cold int initialize(AVFilterContext *ctx) @@ -276,6 +293,7 @@ static int config_props(AVFilterLink *inlink) s->planeheight[0] = s->planeheight[3] = inlink->h; s->nb_planes = av_pix_fmt_count_planes(inlink->format); + s->nb_threads = FFMIN(32, ff_filter_get_nb_threads(inlink->dst)); for (i = 0; i < desc->nb_components; i++) { int w = s->planewidth[i]; @@ -287,10 +305,12 @@ static int config_props(AVFilterLink *inlink) if (!(s->rdft_hdata[i] = av_malloc_array(h, s->rdft_hlen[i] * sizeof(FFTSample)))) return AVERROR(ENOMEM); - if (!(s->hrdft[i] = av_rdft_init(s->rdft_hbits[i], DFT_R2C))) - return AVERROR(ENOMEM); - if (!(s->ihrdft[i] = av_rdft_init(s->rdft_hbits[i], IDFT_C2R))) - return AVERROR(ENOMEM); + for (int j = 0; j < s->nb_threads; j++) { + if (!(s->hrdft[j][i] = av_rdft_init(s->rdft_hbits[i], DFT_R2C))) + return AVERROR(ENOMEM); + if (!(s->ihrdft[j][i] = av_rdft_init(s->rdft_hbits[i], IDFT_C2R))) + return AVERROR(ENOMEM); + } /* RDFT - Array initialization for Vertical pass*/ s->rdft_vlen[i] = 1 << (32 - ff_clz(h)); @@ -298,10 +318,12 @@ static int config_props(AVFilterLink *inlink) if (!(s->rdft_vdata[i] = av_malloc_array(s->rdft_hlen[i], s->rdft_vlen[i] * sizeof(FFTSample)))) return AVERROR(ENOMEM); - if (!(s->vrdft[i] = av_rdft_init(s->rdft_vbits[i], DFT_R2C))) - return AVERROR(ENOMEM); - if (!(s->ivrdft[i] = av_rdft_init(s->rdft_vbits[i], IDFT_C2R))) - return AVERROR(ENOMEM); + for (int j = 0; j < s->nb_threads; j++) { + if (!(s->vrdft[j][i] = av_rdft_init(s->rdft_vbits[i], DFT_R2C))) + return AVERROR(ENOMEM); + if (!(s->ivrdft[j][i] = av_rdft_init(s->rdft_vbits[i], IDFT_C2R))) + return AVERROR(ENOMEM); + } } /*Luminance value - Array initialization*/ @@ -325,13 +347,109 @@ static int config_props(AVFilterLink *inlink) return 0; } +static int multiply_data(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + FFTFILTContext *s = ctx->priv; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int height = s->rdft_hlen[plane]; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + /*Change user defined parameters*/ + for (int i = slice_start; i < slice_end; i++) { + const double *weight = s->weight[plane] + i * s->rdft_vlen[plane]; + float *vdata = s->rdft_vdata[plane] + i * s->rdft_vlen[plane]; + + for (int j = 0; j < s->rdft_vlen[plane]; j++) + vdata[j] *= weight[j]; + } + } + + return 0; +} + +static int copy_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + FFTFILTContext *s = ctx->priv; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int hlen = s->rdft_hlen[plane]; + const int vlen = s->rdft_vlen[plane]; + const int slice_start = (hlen * jobnr) / nb_jobs; + const int slice_end = (hlen * (jobnr+1)) / nb_jobs; + const int h = s->planeheight[plane]; + FFTSample *hdata = s->rdft_hdata[plane]; + FFTSample *vdata = s->rdft_vdata[plane]; + + for (int i = slice_start; i < slice_end; i++) { + for (int j = 0; j < h; j++) + vdata[i * vlen + j] = hdata[j * hlen + i]; + copy_rev(vdata + i * vlen, h, vlen); + } + } + + return 0; +} + +static int rdft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + FFTFILTContext *s = ctx->priv; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int height = s->rdft_hlen[plane]; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->vrdft[jobnr][plane], s->rdft_vdata[plane] + i * s->rdft_vlen[plane]); + } + + return 0; +} + +static int irdft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + FFTFILTContext *s = ctx->priv; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int height = s->rdft_hlen[plane]; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + + for (int i = slice_start; i < slice_end; i++) + av_rdft_calc(s->ivrdft[jobnr][plane], s->rdft_vdata[plane] + i * s->rdft_vlen[plane]); + } + + return 0; +} + +static int copy_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + FFTFILTContext *s = ctx->priv; + + for (int plane = 0; plane < s->nb_planes; plane++) { + const int hlen = s->rdft_hlen[plane]; + const int vlen = s->rdft_vlen[plane]; + const int slice_start = (hlen * jobnr) / nb_jobs; + const int slice_end = (hlen * (jobnr+1)) / nb_jobs; + const int h = s->planeheight[plane]; + FFTSample *hdata = s->rdft_hdata[plane]; + FFTSample *vdata = s->rdft_vdata[plane]; + + for (int i = slice_start; i < slice_end; i++) + for (int j = 0; j < h; j++) + hdata[j * hlen + i] = vdata[i * vlen + j]; + } + + return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; AVFilterLink *outlink = inlink->dst->outputs[0]; FFTFILTContext *s = ctx->priv; AVFrame *out; - int i, j, plane; out = ff_get_video_buffer(outlink, inlink->w, inlink->h); if (!out) { @@ -341,27 +459,34 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_copy_props(out, in); - for (plane = 0; plane < s->nb_planes; plane++) { - int w = s->planewidth[plane]; - int h = s->planeheight[plane]; - + for (int plane = 0; plane < s->nb_planes; plane++) { if (s->eval_mode == EVAL_MODE_FRAME) do_eval(s, inlink, plane); + } - s->rdft_horizontal(s, in, w, h, plane); - rdft_vertical(s, h, plane); + ff_filter_execute(ctx, s->rdft_horizontal, in, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); - /*Change user defined parameters*/ - for (i = 0; i < s->rdft_hlen[plane]; i++) - for (j = 0; j < s->rdft_vlen[plane]; j++) - s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j] *= - s->weight[plane][i * s->rdft_vlen[plane] + j]; + ff_filter_execute(ctx, copy_vertical, NULL, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); + + ff_filter_execute(ctx, rdft_vertical, NULL, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); + ff_filter_execute(ctx, multiply_data, NULL, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); + + for (int plane = 0; plane < s->nb_planes; plane++) s->rdft_vdata[plane][0] += s->rdft_hlen[plane] * s->rdft_vlen[plane] * s->dc[plane]; - irdft_vertical(s, h, plane); - s->irdft_horizontal(s, out, w, h, plane); - } + ff_filter_execute(ctx, irdft_vertical, NULL, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); + + ff_filter_execute(ctx, copy_horizontal, NULL, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); + + ff_filter_execute(ctx, s->irdft_horizontal, out, NULL, + FFMIN(s->planeheight[1], s->nb_threads)); av_frame_free(&in); return ff_filter_frame(outlink, out); @@ -376,10 +501,12 @@ static av_cold void uninit(AVFilterContext *ctx) av_free(s->rdft_vdata[i]); av_expr_free(s->weight_expr[i]); av_free(s->weight[i]); - av_rdft_end(s->hrdft[i]); - av_rdft_end(s->ihrdft[i]); - av_rdft_end(s->vrdft[i]); - av_rdft_end(s->ivrdft[i]); + for (int j = 0; j < s->nb_threads; j++) { + av_rdft_end(s->hrdft[j][i]); + av_rdft_end(s->ihrdft[j][i]); + av_rdft_end(s->vrdft[j][i]); + av_rdft_end(s->ivrdft[j][i]); + } } } @@ -426,5 +553,5 @@ const AVFilter ff_vf_fftfilt = { FILTER_PIXFMTS_ARRAY(pixel_fmts_fftfilt), .init = initialize, .uninit = uninit, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, }; -- cgit v1.2.3