From 148c8e88c43cfbabd6aee9f01ef30942cee9d359 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Sat, 16 Sep 2017 02:17:58 +0530 Subject: avfilter: add vmafmotion filter Signed-off-by: Ashish Singh Signed-off-by: Ronald S. Bultje --- libavfilter/vf_vmafmotion.c | 365 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 libavfilter/vf_vmafmotion.c (limited to 'libavfilter/vf_vmafmotion.c') diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c new file mode 100644 index 0000000000..6b6150a931 --- /dev/null +++ b/libavfilter/vf_vmafmotion.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2017 Ronald S. Bultje + * Copyright (c) 2017 Ashish Pratap Singh + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Calculate VMAF Motion score. + */ + +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "drawutils.h" +#include "formats.h" +#include "internal.h" +#include "vmaf_motion.h" + +#define BIT_SHIFT 15 + +static const float FILTER_5[5] = { + 0.054488685, + 0.244201342, + 0.402619947, + 0.244201342, + 0.054488685 +}; + +typedef struct VMAFMotionContext { + const AVClass *class; + VMAFMotionData data; + FILE *stats_file; + char *stats_file_str; +} VMAFMotionContext; + +#define OFFSET(x) offsetof(VMAFMotionContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption vmafmotion_options[] = { + {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(vmafmotion); + +static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, + int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride) +{ + ptrdiff_t img1_stride = _img1_stride / sizeof(*img1); + ptrdiff_t img2_stride = _img2_stride / sizeof(*img2); + uint64_t sum = 0; + int i, j; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + sum += abs(img1[j] - img2[j]); + } + img1 += img1_stride; + img2 += img2_stride; + } + + return sum; +} + +static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, + uint16_t *dst, int w, int h, ptrdiff_t _src_stride, + ptrdiff_t _dst_stride) +{ + ptrdiff_t src_stride = _src_stride / sizeof(*src); + ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); + int radius = filt_w / 2; + int borders_left = radius; + int borders_right = w - (filt_w - radius); + int i, j, k; + int sum = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < borders_left; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + + for (j = borders_left; j < borders_right; j++) { + int sum = 0; + for (k = 0; k < filt_w; k++) { + sum += filter[k] * src[i * src_stride + j - radius + k]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + + for (j = borders_right; j < w; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + } +} + +#define conv_y_fn(type, bits) \ +static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \ + const uint8_t *_src, uint16_t *dst, \ + int w, int h, ptrdiff_t _src_stride, \ + ptrdiff_t _dst_stride) \ +{ \ + const type *src = (const type *) _src; \ + ptrdiff_t src_stride = _src_stride / sizeof(*src); \ + ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \ + int radius = filt_w / 2; \ + int borders_top = radius; \ + int borders_bottom = h - (filt_w - radius); \ + int i, j, k; \ + int sum = 0; \ + \ + for (i = 0; i < borders_top; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ + for (i = borders_top; i < borders_bottom; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + sum += filter[k] * src[(i - radius + k) * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ + for (i = borders_bottom; i < h; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ +} + +conv_y_fn(uint8_t, 8); +conv_y_fn(uint16_t, 10); + +static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) { + dsp->convolution_x = convolution_x; + dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit; + dsp->sad = image_sad; +} + +double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref) +{ + double score; + + s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data, + s->width, s->height, ref->linesize[0], s->stride); + s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0], + s->width, s->height, s->stride, s->stride); + + if (!s->nb_frames) { + score = 0.0; + } else { + uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0], + s->width, s->height, s->stride, s->stride); + // the output score is always normalized to 8 bits + score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8))); + } + + FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]); + s->nb_frames++; + s->motion_sum += score; + + return score; +} + +static void set_meta(AVDictionary **metadata, const char *key, float d) +{ + char value[128]; + snprintf(value, sizeof(value), "%0.2f", d); + av_dict_set(metadata, key, value, 0); +} + +static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref) +{ + VMAFMotionContext *s = ctx->priv; + double score; + + score = ff_vmafmotion_process(&s->data, ref); + set_meta(&ref->metadata, "lavfi.vmafmotion.score", score); + if (s->stats_file) { + fprintf(s->stats_file, + "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score); + } +} + + +int ff_vmafmotion_init(VMAFMotionData *s, + int w, int h, enum AVPixelFormat fmt) +{ + size_t data_sz; + int i; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + + s->width = w; + s->height = h; + s->stride = FFALIGN(w * sizeof(uint16_t), 32); + + data_sz = (size_t) s->stride * h; + if (!(s->blur_data[0] = av_malloc(data_sz)) || + !(s->blur_data[1] = av_malloc(data_sz)) || + !(s->temp_data = av_malloc(data_sz))) { + return AVERROR(ENOMEM); + } + + for (i = 0; i < 5; i++) { + s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT)); + } + + vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth); + + return 0; +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10, + AV_PIX_FMT_NONE + }; + + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +static int config_input_ref(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + VMAFMotionContext *s = ctx->priv; + + return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w, + ctx->inputs[0]->h, ctx->inputs[0]->format); +} + +double ff_vmafmotion_uninit(VMAFMotionData *s) +{ + av_free(s->blur_data[0]); + av_free(s->blur_data[1]); + av_free(s->temp_data); + + return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *ref) +{ + AVFilterContext *ctx = inlink->dst; + do_vmafmotion(ctx, ref); + return ff_filter_frame(ctx->outputs[0], ref); +} + +static av_cold int init(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + + if (s->stats_file_str) { + if (!strcmp(s->stats_file_str, "-")) { + s->stats_file = stdout; + } else { + s->stats_file = fopen(s->stats_file_str, "w"); + if (!s->stats_file) { + int err = AVERROR(errno); + char buf[128]; + av_strerror(err, buf, sizeof(buf)); + av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n", + s->stats_file_str, buf); + return err; + } + } + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + double avg_motion = ff_vmafmotion_uninit(&s->data); + + if (s->data.nb_frames > 0) { + av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion); + } + + if (s->stats_file && s->stats_file != stdout) + fclose(s->stats_file); +} + +static const AVFilterPad vmafmotion_inputs[] = { + { + .name = "reference", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input_ref, + }, + { NULL } +}; + +static const AVFilterPad vmafmotion_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_vmafmotion = { + .name = "vmafmotion", + .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .priv_size = sizeof(VMAFMotionContext), + .priv_class = &vmafmotion_class, + .inputs = vmafmotion_inputs, + .outputs = vmafmotion_outputs, +}; -- cgit v1.2.3