From c737f6edcef74a64f4d0ebcefa970bd31266d512 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Tue, 19 Jan 2021 17:06:05 +0100 Subject: avfilter/vf_nnedi: simplify code even more --- libavfilter/vf_nnedi.c | 151 +++++++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 69 deletions(-) (limited to 'libavfilter/vf_nnedi.c') diff --git a/libavfilter/vf_nnedi.c b/libavfilter/vf_nnedi.c index 5863ec478c..786059052b 100644 --- a/libavfilter/vf_nnedi.c +++ b/libavfilter/vf_nnedi.c @@ -37,8 +37,8 @@ static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 }; static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 }; static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 }; -typedef struct PrescreenerOldCoefficients { - DECLARE_ALIGNED(32, float, kernel_l0)[4][14 * 4]; +typedef struct PrescreenerCoefficients { + DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4]; DECLARE_ALIGNED(32, float, bias_l0)[4]; DECLARE_ALIGNED(32, float, kernel_l1)[4][4]; @@ -46,15 +46,7 @@ typedef struct PrescreenerOldCoefficients { DECLARE_ALIGNED(32, float, kernel_l2)[4][8]; DECLARE_ALIGNED(32, float, bias_l2)[4]; -} PrescreenerOldCoefficients; - -typedef struct PrescreenerNewCoefficients { - DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4]; - DECLARE_ALIGNED(32, float, bias_l0)[4]; - - DECLARE_ALIGNED(32, float, kernel_l1)[4][4]; - DECLARE_ALIGNED(32, float, bias_l1)[4]; -} PrescreenerNewCoefficients; +} PrescreenerCoefficients; typedef struct PredictorCoefficients { int xdim, ydim, nns, nsize; @@ -89,8 +81,7 @@ typedef struct NNEDIContext { int planeheight[4]; int field_n; - PrescreenerOldCoefficients prescreener_old; - PrescreenerNewCoefficients prescreener_new[3]; + PrescreenerCoefficients prescreener[4]; PredictorCoefficients coeffs[2][5][7]; float half; @@ -108,9 +99,9 @@ typedef struct NNEDIContext { int pscrn; int input_size; - uint8_t *prescreen_buf; - float *input_buf; - float *output_buf; + uint8_t **prescreen_buf; + float **input_buf; + float **output_buf; void (*read)(const uint8_t *src, float *dst, int src_stride, int dst_stride, @@ -120,7 +111,8 @@ typedef struct NNEDIContext { int width, int height, int depth, float scale); void (*prescreen[2])(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, - uint8_t *prescreen, int N, void *data); + uint8_t *prescreen, int N, + const PrescreenerCoefficients *const coeffs); } NNEDIContext; #define OFFSET(x) offsetof(NNEDIContext, x) @@ -222,7 +214,7 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, fmts_list); } -static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input, +static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input, int n, float scale, float bias) { float sum; @@ -246,10 +238,9 @@ static void transform_elliott(float *input, int size) static void process_old(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, uint8_t *prescreen, int N, - void *data) + const PrescreenerCoefficients *const m_data) { NNEDIContext *s = ctx->priv; - const PrescreenerOldCoefficients *const m_data = data; const float *src_p = src; // Adjust source pointer to point to top-left of filter window. @@ -283,10 +274,9 @@ static void process_old(AVFilterContext *ctx, static void process_new(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, uint8_t *prescreen, int N, - void *data) + const PrescreenerCoefficients *const m_data) { NNEDIContext *s = ctx->priv; - const PrescreenerNewCoefficients *const m_data = data; const float *src_p = src; // Adjust source pointer to point to top-left of filter window. @@ -344,8 +334,8 @@ static void gather_input(const float *src, ptrdiff_t src_stride, float *buf, float mstd[4], const PredictorCoefficients *const model) { - float sum = 0; - float sum_sq = 0; + float sum = 0.f; + float sum_sq = 0.f; float tmp; for (int i = 0; i < model->ydim; i++) { @@ -405,17 +395,16 @@ static void wae5(const float *softmax, const float *el, static void predictor(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, void *dst, const uint8_t *prescreen, int N, - void *data, int use_q2) + const PredictorCoefficients *const model, int use_q2) { - NNEDIContext *s = ctx->priv; - const PredictorCoefficients *const model = data; + const NNEDIContext *const s = ctx->priv; const float *src_p = src; float *dst_p = dst; // Adjust source pointer to point to top-left of filter window. const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1); - int filter_size = model->nsize; - int nns = model->nns; + const int filter_size = model->nsize; + const int nns = model->nns; for (int i = 0; i < N; i++) { LOCAL_ALIGNED_32(float, input, [48 * 6]); @@ -433,7 +422,7 @@ static void predictor(AVFilterContext *ctx, activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]); for (int nn = 0; nn < nns; nn++) - activation[model->nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]); + activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]); transform_softmax_exp(activation, nns); wae5(activation, activation + nns, nns, mstd); @@ -449,7 +438,7 @@ static void predictor(AVFilterContext *ctx, wae5(activation, activation + nns, nns, mstd); } - dst_p[i] = mstd[3] / (use_q2 ? 2 : 1); + dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f); } } @@ -551,7 +540,7 @@ static void interpolation(const void *src, ptrdiff_t src_stride, static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { - NNEDIContext *s = ctx->priv; + const NNEDIContext *const s = ctx->priv; AVFrame *out = s->dst; AVFrame *in = s->src; const float in_scale = s->in_scale; @@ -572,10 +561,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) uint8_t *dst = out->data[p] + slice_start * out->linesize[p]; const int src_linesize = in->linesize[p]; const int dst_linesize = out->linesize[p]; - uint8_t *prescreen_buf = s->prescreen_buf + s->planewidth[0] * jobnr; - float *srcbuf = s->input_buf + s->input_size * jobnr; + uint8_t *prescreen_buf = s->prescreen_buf[jobnr]; + float *srcbuf = s->input_buf[jobnr]; const int srcbuf_stride = width + 64; - float *dstbuf = s->output_buf + s->input_size * jobnr; + float *dstbuf = s->output_buf[jobnr]; const int dstbuf_stride = width; const int slice_height = (slice_end - slice_start) / 2; const int last_slice = slice_end == height; @@ -646,15 +635,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) width, 1, in_scale); for (int y = 0; y < slice_end - slice_start; y += 2) { - if (s->pscrn > 1) { - s->prescreen[1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32, - srcbuf_stride, prescreen_buf, width, - &s->prescreener_new[s->pscrn - 2]); - } else if (s->pscrn == 1) { - s->prescreen[0](ctx, srcbuf + (y / 2) * srcbuf_stride + 32, - srcbuf_stride, prescreen_buf, width, - &s->prescreener_old); - } + if (s->prescreen > 0) + s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32, + srcbuf_stride, prescreen_buf, width, + &s->prescreener[s->pscrn - 1]); predictor(ctx, srcbuf + (y / 2) * srcbuf_stride + 32, @@ -831,7 +815,7 @@ static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int int bias_size = nns; float *data; - data = av_malloc_array(filter_size + bias_size, 4 * sizeof(float)); + data = av_calloc(filter_size + bias_size, 4 * sizeof(float)); if (!data) return AVERROR(ENOMEM); @@ -859,17 +843,17 @@ static int read_weights(AVFilterContext *ctx, const float *bdata) NNEDIContext *s = ctx->priv; int ret; - copy_weights(&s->prescreener_old.kernel_l0[0][0], 4 * 48, &bdata); - copy_weights(s->prescreener_old.bias_l0, 4, &bdata); + copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata); + copy_weights(s->prescreener[0].bias_l0, 4, &bdata); - copy_weights(&s->prescreener_old.kernel_l1[0][0], 4 * 4, &bdata); - copy_weights(s->prescreener_old.bias_l1, 4, &bdata); + copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata); + copy_weights(s->prescreener[0].bias_l1, 4, &bdata); - copy_weights(&s->prescreener_old.kernel_l2[0][0], 4 * 8, &bdata); - copy_weights(s->prescreener_old.bias_l2, 4, &bdata); + copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata); + copy_weights(s->prescreener[0].bias_l2, 4, &bdata); for (int i = 0; i < 3; i++) { - PrescreenerNewCoefficients *data = &s->prescreener_new[i]; + PrescreenerCoefficients *data = &s->prescreener[i + 1]; float kernel_l0_shuffled[4 * 64]; float kernel_l1_shuffled[4 * 4]; @@ -890,14 +874,14 @@ static int read_weights(AVFilterContext *ctx, const float *bdata) for (int m = 0; m < 2; m++) { // Grouping by neuron count. for (int i = 0; i < 5; i++) { - int nns = NNEDI_NNS[i]; + const int nns = NNEDI_NNS[i]; // Grouping by window size. for (int j = 0; j < 7; j++) { PredictorCoefficients *model = &s->coeffs[m][i][j]; - int xdim = NNEDI_XDIM[j]; - int ydim = NNEDI_YDIM[j]; - int filter_size = xdim * ydim; + const int xdim = NNEDI_XDIM[j]; + const int ydim = NNEDI_YDIM[j]; + const int filter_size = xdim * ydim; ret = allocate_model(model, xdim, ydim, nns); if (ret < 0) @@ -927,7 +911,7 @@ static int read_weights(AVFilterContext *ctx, const float *bdata) static float mean(const float *input, int size) { - float sum = 0.; + float sum = 0.f; for (int i = 0; i < size; i++) sum += input[i]; @@ -941,7 +925,7 @@ static void transform(float *input, int size, float mean, float half) input[i] = (input[i] - mean) / half; } -static void subtract_mean_old(PrescreenerOldCoefficients *coeffs, float half) +static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half) { for (int n = 0; n < 4; n++) { float m = mean(coeffs->kernel_l0[n], 48); @@ -950,7 +934,7 @@ static void subtract_mean_old(PrescreenerOldCoefficients *coeffs, float half) } } -static void subtract_mean_new(PrescreenerNewCoefficients *coeffs, float half) +static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half) { for (int n = 0; n < 4; n++) { float m = mean(coeffs->kernel_l0[n], 64); @@ -1121,10 +1105,10 @@ static int config_input(AVFilterLink *inlink) break; } - subtract_mean_old(&s->prescreener_old, s->half); - subtract_mean_new(&s->prescreener_new[0], s->half); - subtract_mean_new(&s->prescreener_new[1], s->half); - subtract_mean_new(&s->prescreener_new[2], s->half); + subtract_mean_old(&s->prescreener[0], s->half); + subtract_mean_new(&s->prescreener[1], s->half); + subtract_mean_new(&s->prescreener[2], s->half); + subtract_mean_new(&s->prescreener[3], s->half); s->prescreen[0] = process_old; s->prescreen[1] = process_new; @@ -1136,19 +1120,37 @@ static int config_input(AVFilterLink *inlink) } } - s->prescreen_buf = av_calloc(s->nb_threads * s->planewidth[0], sizeof(*s->prescreen_buf)); - if (!s->prescreen_buf) - return AVERROR(ENOMEM); - s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6); - s->input_buf = av_calloc(s->nb_threads * s->input_size, sizeof(*s->input_buf)); + s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf)); if (!s->input_buf) return AVERROR(ENOMEM); - s->output_buf = av_calloc(s->nb_threads * s->input_size, sizeof(*s->output_buf)); + for (int i = 0; i < s->nb_threads; i++) { + s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf)); + if (!s->input_buf[i]) + return AVERROR(ENOMEM); + } + + s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf)); if (!s->output_buf) return AVERROR(ENOMEM); + for (int i = 0; i < s->nb_threads; i++) { + s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf)); + if (!s->output_buf[i]) + return AVERROR(ENOMEM); + } + + s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf)); + if (!s->prescreen_buf) + return AVERROR(ENOMEM); + + for (int i = 0; i < s->nb_threads; i++) { + s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf)); + if (!s->prescreen_buf[i]) + return AVERROR(ENOMEM); + } + return 0; } @@ -1156,8 +1158,19 @@ static av_cold void uninit(AVFilterContext *ctx) { NNEDIContext *s = ctx->priv; + for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++) + av_freep(&s->prescreen_buf[i]); + av_freep(&s->prescreen_buf); + + for (int i = 0; i < s->nb_threads && s->input_buf; i++) + av_freep(&s->input_buf[i]); + av_freep(&s->input_buf); + + for (int i = 0; i < s->nb_threads && s->output_buf; i++) + av_freep(&s->output_buf[i]); + av_freep(&s->output_buf); av_freep(&s->fdsp); -- cgit v1.2.3