From 68a2722aee2868084ad3ba1a7a5431735eab049e Mon Sep 17 00:00:00 2001 From: Wu Jianhua Date: Wed, 4 Aug 2021 10:06:13 +0800 Subject: libavfilter/x86/vf_gblur: add ff_verti_slice_avx2/512() The new vertical slice with AVX2/512 acceleration can significantly improve the performance of Gaussian Filter 2D. Performance data: ff_verti_slice_c: 32.57 ff_verti_slice_avx2: 476.19 ff_verti_slice_avx512: 833.33 Co-authored-by: Cheng Yanfei Co-authored-by: Jin Jun Signed-off-by: Wu Jianhua --- libavfilter/vf_gblur.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'libavfilter/vf_gblur.c') diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index 4780bb6204..a2c410c07b 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -138,6 +138,19 @@ static void do_vertical_columns(float *buffer, int width, int height, } } +static void verti_slice_c(float *buffer, int width, int height, + int slice_start, int slice_end, int steps, + float nu, float boundaryscale) +{ + int aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3); + /* Filter vertically along columns (process 8 columns in each step) */ + do_vertical_columns(buffer, width, height, slice_start, aligned_end, + steps, nu, boundaryscale, 8); + /* Filter un-aligned columns one by one */ + do_vertical_columns(buffer, width, height, aligned_end, slice_end, + steps, nu, boundaryscale, 1); +} + static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { GBlurContext *s = ctx->priv; @@ -150,16 +163,10 @@ static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_ const int steps = s->steps; const float nu = s->nuV; float *buffer = s->buffer; - int aligned_end; - aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3); - /* Filter vertically along columns (process 8 columns in each step) */ - do_vertical_columns(buffer, width, height, slice_start, aligned_end, - steps, nu, boundaryscale, 8); + s->verti_slice(buffer, width, height, slice_start, slice_end, + steps, nu, boundaryscale); - /* Filter un-aligned columns one by one */ - do_vertical_columns(buffer, width, height, aligned_end, slice_end, - steps, nu, boundaryscale, 1); return 0; } @@ -236,6 +243,7 @@ static int query_formats(AVFilterContext *ctx) void ff_gblur_init(GBlurContext *s) { s->horiz_slice = horiz_slice_c; + s->verti_slice = verti_slice_c; s->postscale_slice = postscale_c; if (ARCH_X86) ff_gblur_init_x86(s); -- cgit v1.2.3