From 581bafa83b9cb4f438940e0b10a9f9832ebd57b6 Mon Sep 17 00:00:00 2001 From: Danil Iashchenko Date: Sat, 30 Jun 2018 03:46:24 +0300 Subject: lavfi: add sobel, prewitt, roberts filters Add opencl version of sobel, prewitt, roberts filters. --- libavfilter/Makefile | 8 +- libavfilter/allfilters.c | 3 + libavfilter/opencl/convolution.cl | 82 +++++++++++++ libavfilter/vf_convolution_opencl.c | 233 +++++++++++++++++++++++++++++------- 4 files changed, 280 insertions(+), 46 deletions(-) (limited to 'libavfilter') diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 15dce437da..7735c26529 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -171,7 +171,7 @@ OBJS-$(CONFIG_COLORMATRIX_FILTER) += vf_colormatrix.o OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspace.o colorspacedsp.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += vf_convolution.o OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ - opencl/convolution.o + opencl/convolution.o OBJS-$(CONFIG_CONVOLVE_FILTER) += vf_convolve.o framesync.o OBJS-$(CONFIG_COPY_FILTER) += vf_copy.o OBJS-$(CONFIG_COREIMAGE_FILTER) += vf_coreimage.o @@ -294,6 +294,8 @@ OBJS-$(CONFIG_PP_FILTER) += vf_pp.o OBJS-$(CONFIG_PP7_FILTER) += vf_pp7.o OBJS-$(CONFIG_PREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_PREWITT_FILTER) += vf_convolution.o +OBJS-$(CONFIG_PREWITT_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ + opencl/convolution.o OBJS-$(CONFIG_PROCAMP_VAAPI_FILTER) += vf_procamp_vaapi.o vaapi_vpp.o OBJS-$(CONFIG_PROGRAM_OPENCL_FILTER) += vf_program_opencl.o opencl.o framesync.o OBJS-$(CONFIG_PSEUDOCOLOR_FILTER) += vf_pseudocolor.o @@ -310,6 +312,8 @@ OBJS-$(CONFIG_REMOVELOGO_FILTER) += bbox.o lswsutils.o lavfutils.o v OBJS-$(CONFIG_REPEATFIELDS_FILTER) += vf_repeatfields.o OBJS-$(CONFIG_REVERSE_FILTER) += f_reverse.o OBJS-$(CONFIG_ROBERTS_FILTER) += vf_convolution.o +OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ + opencl/convolution.o OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o @@ -338,6 +342,8 @@ OBJS-$(CONFIG_SIGNALSTATS_FILTER) += vf_signalstats.o OBJS-$(CONFIG_SIGNATURE_FILTER) += vf_signature.o OBJS-$(CONFIG_SMARTBLUR_FILTER) += vf_smartblur.o OBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o +OBJS-$(CONFIG_SOBEL_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ + opencl/convolution.o OBJS-$(CONFIG_SPLIT_FILTER) += split.o OBJS-$(CONFIG_SPP_FILTER) += vf_spp.o OBJS-$(CONFIG_SR_FILTER) += vf_sr.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index e0a32f65c6..0ded83ede2 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -282,6 +282,7 @@ extern AVFilter ff_vf_pp; extern AVFilter ff_vf_pp7; extern AVFilter ff_vf_premultiply; extern AVFilter ff_vf_prewitt; +extern AVFilter ff_vf_prewitt_opencl; extern AVFilter ff_vf_procamp_vaapi; extern AVFilter ff_vf_program_opencl; extern AVFilter ff_vf_pseudocolor; @@ -298,6 +299,7 @@ extern AVFilter ff_vf_removelogo; extern AVFilter ff_vf_repeatfields; extern AVFilter ff_vf_reverse; extern AVFilter ff_vf_roberts; +extern AVFilter ff_vf_roberts_opencl; extern AVFilter ff_vf_rotate; extern AVFilter ff_vf_sab; extern AVFilter ff_vf_scale; @@ -326,6 +328,7 @@ extern AVFilter ff_vf_signalstats; extern AVFilter ff_vf_signature; extern AVFilter ff_vf_smartblur; extern AVFilter ff_vf_sobel; +extern AVFilter ff_vf_sobel_opencl; extern AVFilter ff_vf_split; extern AVFilter ff_vf_spp; extern AVFilter ff_vf_sr; diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl index 03ef4eff1b..815c779272 100644 --- a/libavfilter/opencl/convolution.cl +++ b/libavfilter/opencl/convolution.cl @@ -43,3 +43,85 @@ __kernel void convolution_global(__write_only image2d_t dst, float4 dstPix = convPix * div + bias; write_imagef(dst, loc, dstPix); } + + +__kernel void sobel_global(__write_only image2d_t dst, + __read_only image2d_t src, + float div, + float bias) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 + + read_imagef(src, sampler, loc + (int2)( 0,-1)) * -2 + + read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 + + read_imagef(src, sampler, loc + (int2)(-1, 1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 0, 1)) * 2 + + read_imagef(src, sampler, loc + (int2)( 1, 1)) * 1; + + float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 + + read_imagef(src, sampler, loc + (int2)(-1, 0)) * -2 + + read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 + + read_imagef(src, sampler, loc + (int2)( 1,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 1, 0)) * 2 + + read_imagef(src, sampler, loc + (int2)( 1, 1)) * 1; + + float4 dstPix = hypot(sum1, sum2) * div + bias; + write_imagef(dst, loc, dstPix); +} + +__kernel void prewitt_global(__write_only image2d_t dst, + __read_only image2d_t src, + float div, + float bias) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 0,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 1,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 + + read_imagef(src, sampler, loc + (int2)( 0, 1)) * -1 + + read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1; + + float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)(-1, 0)) * 1 + + read_imagef(src, sampler, loc + (int2)(-1, 1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 + + read_imagef(src, sampler, loc + (int2)( 1, 0)) * -1 + + read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1; + + float4 dstPix = hypot(sum1, sum2) * div + bias; + write_imagef(dst, loc, dstPix); +} + +__kernel void roberts_global(__write_only image2d_t dst, + __read_only image2d_t src, + float div, + float bias) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * 1 + + read_imagef(src, sampler, loc + (int2)( 0,-1)) * -1; + + + float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1, 0)) * -1 + + read_imagef(src, sampler, loc + (int2)( 0, 0)) * 1; + + + float4 dstPix = hypot(sum1, sum2) * div + bias; + write_imagef(dst, loc, dstPix); +} diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c index 4d0ecf8b9a..e2ba81b45e 100644 --- a/libavfilter/vf_convolution_opencl.c +++ b/libavfilter/vf_convolution_opencl.c @@ -47,12 +47,16 @@ typedef struct ConvolutionOpenCLContext { cl_float rdivs[4]; cl_float biases[4]; -} ConvolutionOpenCLContext; + cl_int planes; + cl_float scale; + cl_float delta; +} ConvolutionOpenCLContext; static int convolution_opencl_init(AVFilterContext *avctx) { ConvolutionOpenCLContext *ctx = avctx->priv; + const char *kernel_name; cl_int cle; int err; @@ -70,7 +74,16 @@ static int convolution_opencl_init(AVFilterContext *avctx) goto fail; } - ctx->kernel = clCreateKernel(ctx->ocf.program, "convolution_global", &cle); + if (!strcmp(avctx->filter->name, "convolution_opencl")) { + kernel_name = "convolution_global"; + } else if (!strcmp(avctx->filter->name, "sobel_opencl")) { + kernel_name = "sobel_global"; + } else if (!strcmp(avctx->filter->name, "prewitt_opencl")){ + kernel_name = "prewitt_global"; + } else if (!strcmp(avctx->filter->name, "roberts_opencl")){ + kernel_name = "roberts_global"; + } + ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle); if (!ctx->kernel) { av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); err = AVERROR(EIO); @@ -173,6 +186,8 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) size_t global_work[2]; cl_mem src, dst; int err, p; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {0, 0, 1}; av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), @@ -186,9 +201,14 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) if (err < 0) goto fail; - err = convolution_opencl_make_filter_params(avctx); - if (err < 0) - goto fail; + if (!strcmp(avctx->filter->name, "convolution_opencl")) { + err = convolution_opencl_make_filter_params(avctx); + if (err < 0) + goto fail; + } else { + ctx->delta /= 255.0; + } + } output = ff_get_video_buffer(outlink, outlink->w, outlink->h); @@ -204,29 +224,69 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) if (!dst) break; - CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst); - CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src); - CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dims[p]); - CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem, &ctx->matrix[p]); - CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]); - CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]); - - err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0); - if (err < 0) - goto fail; - - av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " - "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", - p, global_work[0], global_work[1]); - - cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, - global_work, NULL, - 0, NULL, NULL); - if (cle != CL_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", - cle); - err = AVERROR(EIO); - goto fail; + if (!strcmp(avctx->filter->name, "convolution_opencl")) { + CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst); + CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src); + CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dims[p]); + CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem, &ctx->matrix[p]); + CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]); + CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]); + + err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0); + if (err < 0) + goto fail; + + av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " + "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", + p, global_work[0], global_work[1]); + + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, + global_work, NULL, + 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", + cle); + err = AVERROR(EIO); + goto fail; + } + } else { + if (!(ctx->planes & (1 << p))) { + err = ff_opencl_filter_work_size_from_image(avctx, region, output, p, 0); + if (err < 0) + goto fail; + + cle = clEnqueueCopyImage(ctx->command_queue, src, dst, + origin, origin, region, 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to copy plane %d: %d.\n", + p, cle); + err = AVERROR(EIO); + goto fail; + } + } else { + CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst); + CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src); + CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_float, &ctx->scale); + CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_float, &ctx->delta); + + err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0); + if (err < 0) + goto fail; + + av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " + "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", + p, global_work[0], global_work[1]); + + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, + global_work, NULL, + 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", + cle); + err = AVERROR(EIO); + goto fail; + } + } } } @@ -284,8 +344,30 @@ static av_cold void convolution_opencl_uninit(AVFilterContext *avctx) ff_opencl_filter_uninit(avctx); } +static const AVFilterPad convolution_opencl_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &convolution_opencl_filter_frame, + .config_props = &ff_opencl_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad convolution_opencl_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_output, + }, + { NULL } +}; + #define OFFSET(x) offsetof(ConvolutionOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +#if CONFIG_CONVOLUTION_OPENCL_FILTER + static const AVOption convolution_opencl_options[] = { { "0m", "set matrix for 2nd plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS }, { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS }, @@ -304,30 +386,89 @@ static const AVOption convolution_opencl_options[] = { AVFILTER_DEFINE_CLASS(convolution_opencl); -static const AVFilterPad convolution_opencl_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &convolution_opencl_filter_frame, - .config_props = &ff_opencl_filter_config_input, - }, +AVFilter ff_vf_convolution_opencl = { + .name = "convolution_opencl", + .description = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"), + .priv_size = sizeof(ConvolutionOpenCLContext), + .priv_class = &convolution_opencl_class, + .init = &ff_opencl_filter_init, + .uninit = &convolution_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .inputs = convolution_opencl_inputs, + .outputs = convolution_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; + +#endif /* CONFIG_CONVOLUTION_OPENCL_FILTER */ + +#if CONFIG_SOBEL_OPENCL_FILTER + +static const AVOption sobel_opencl_options[] = { + { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=15}, 0, 15, FLAGS}, + { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 65535, FLAGS}, + { "delta", "set delta", OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS}, { NULL } }; -static const AVFilterPad convolution_opencl_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_opencl_filter_config_output, - }, +AVFILTER_DEFINE_CLASS(sobel_opencl); + +AVFilter ff_vf_sobel_opencl = { + .name = "sobel_opencl", + .description = NULL_IF_CONFIG_SMALL("Apply sobel operator"), + .priv_size = sizeof(ConvolutionOpenCLContext), + .priv_class = &sobel_opencl_class, + .init = &ff_opencl_filter_init, + .uninit = &convolution_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .inputs = convolution_opencl_inputs, + .outputs = convolution_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; + +#endif /* CONFIG_SOBEL_OPENCL_FILTER */ + +#if CONFIG_PREWITT_OPENCL_FILTER + +static const AVOption prewitt_opencl_options[] = { + { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=15}, 0, 15, FLAGS}, + { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 65535, FLAGS}, + { "delta", "set delta", OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS}, { NULL } }; -AVFilter ff_vf_convolution_opencl = { - .name = "convolution_opencl", - .description = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"), +AVFILTER_DEFINE_CLASS(prewitt_opencl); + +AVFilter ff_vf_prewitt_opencl = { + .name = "prewitt_opencl", + .description = NULL_IF_CONFIG_SMALL("Apply prewitt operator"), .priv_size = sizeof(ConvolutionOpenCLContext), - .priv_class = &convolution_opencl_class, + .priv_class = &prewitt_opencl_class, + .init = &ff_opencl_filter_init, + .uninit = &convolution_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .inputs = convolution_opencl_inputs, + .outputs = convolution_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; + +#endif /* CONFIG_PREWITT_OPENCL_FILTER */ + +#if CONFIG_ROBERTS_OPENCL_FILTER + +static const AVOption roberts_opencl_options[] = { + { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=15}, 0, 15, FLAGS}, + { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 65535, FLAGS}, + { "delta", "set delta", OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS}, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(roberts_opencl); + +AVFilter ff_vf_roberts_opencl = { + .name = "roberts_opencl", + .description = NULL_IF_CONFIG_SMALL("Apply roberts operator"), + .priv_size = sizeof(ConvolutionOpenCLContext), + .priv_class = &roberts_opencl_class, .init = &ff_opencl_filter_init, .uninit = &convolution_opencl_uninit, .query_formats = &ff_opencl_filter_query_formats, @@ -335,3 +476,5 @@ AVFilter ff_vf_convolution_opencl = { .outputs = convolution_opencl_outputs, .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, }; + +#endif /* CONFIG_ROBERTS_OPENCL_FILTER */ -- cgit v1.2.3