From 8bc1553cdb59f462bd98676885d4fcb37286c546 Mon Sep 17 00:00:00 2001 From: Clément Bœsch Date: Sat, 15 Nov 2014 22:15:07 +0100 Subject: avfilter/xbr: add slice threading --- libavfilter/vf_xbr.c | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) (limited to 'libavfilter/vf_xbr.c') diff --git a/libavfilter/vf_xbr.c b/libavfilter/vf_xbr.c index e87f3e7585..6db7570603 100644 --- a/libavfilter/vf_xbr.c +++ b/libavfilter/vf_xbr.c @@ -26,8 +26,6 @@ * * @see http://www.libretro.com/forums/viewtopic.php?f=6&t=134 * @see https://github.com/yoyofr/iFBA/blob/master/fba_src/src/intf/video/scalers/xbr.cpp - * - * @todo add threading */ #include "libavutil/opt.h" @@ -40,7 +38,7 @@ #define RED_BLUE_MASK 0x00FF00FF #define GREEN_MASK 0x0000FF00 -typedef void (*xbrfunc_t)(AVFrame *input, AVFrame *output, const uint32_t *r2y); +typedef int (*xbrfunc_t)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); typedef struct { const AVClass *class; @@ -49,6 +47,11 @@ typedef struct { uint32_t rgbtoyuv[1<<24]; } XBRContext; +typedef struct ThreadData { + AVFrame *in, *out; + const uint32_t *rgbtoyuv; +} ThreadData; + #define OFFSET(x) offsetof(XBRContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM static const AVOption xbr_options[] = { @@ -182,12 +185,18 @@ static uint32_t pixel_diff(uint32_t x, uint32_t y, const uint32_t *r2y) } \ } while (0) -static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y) +static int xbr2x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { int x, y; + const ThreadData *td = arg; + const AVFrame *input = td->in; + AVFrame *output = td->out; + const uint32_t *r2y = td->rgbtoyuv; + const int slice_start = (input->height * jobnr ) / nb_jobs; + const int slice_end = (input->height * (jobnr+1)) / nb_jobs; const int nl = output->linesize[0] >> 2; - for (y = 0; y < input->height; y++) { + for (y = slice_start; y < slice_end; y++) { INIT_SRC_DST_POINTERS(2) for (x = 0; x < input->width; x++) { @@ -209,6 +218,7 @@ static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y) E += 2; } } + return 0; } #define FILT3(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, \ @@ -251,13 +261,19 @@ static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y) } \ } while (0) -static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y) +static int xbr3x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { int x, y; + const ThreadData *td = arg; + const AVFrame *input = td->in; + AVFrame *output = td->out; + const uint32_t *r2y = td->rgbtoyuv; + const int slice_start = (input->height * jobnr ) / nb_jobs; + const int slice_end = (input->height * (jobnr+1)) / nb_jobs; const int nl = output->linesize[0] >> 2; const int nl1 = nl + nl; - for (y = 0; y < input->height; y++) { + for (y = slice_start; y < slice_end; y++) { INIT_SRC_DST_POINTERS(3) for (x = 0; x < input->width; x++) { @@ -281,6 +297,7 @@ static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y) E += 3; } } + return 0; } #define FILT4(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, \ @@ -327,14 +344,20 @@ static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y) } \ } while (0) -static void xbr4x(AVFrame *input, AVFrame *output, const uint32_t *r2y) +static int xbr4x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { int x, y; + const ThreadData *td = arg; + const AVFrame *input = td->in; + AVFrame *output = td->out; + const uint32_t *r2y = td->rgbtoyuv; + const int slice_start = (input->height * jobnr ) / nb_jobs; + const int slice_end = (input->height * (jobnr+1)) / nb_jobs; const int nl = output->linesize[0] >> 2; const int nl1 = nl + nl; const int nl2 = nl1 + nl; - for (y = 0; y < input->height; y++) { + for (y = slice_start; y < slice_end; y++) { INIT_SRC_DST_POINTERS(4) for (x = 0; x < input->width; x++) { @@ -359,6 +382,7 @@ static void xbr4x(AVFrame *input, AVFrame *output, const uint32_t *r2y) E += 4; } } + return 0; } static int config_output(AVFilterLink *outlink) @@ -387,7 +411,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) AVFilterContext *ctx = inlink->dst; AVFilterLink *outlink = ctx->outputs[0]; XBRContext *xbr = ctx->priv; - const uint32_t *r2y = xbr->rgbtoyuv; + ThreadData td; AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { @@ -396,7 +420,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_copy_props(out, in); - xbr->func(in, out, r2y); + + td.in = in; + td.out = out; + td.rgbtoyuv = xbr->rgbtoyuv; + ctx->internal->execute(ctx, xbr->func, &td, NULL, FFMIN(inlink->h, ctx->graph->nb_threads)); out->width = outlink->w; out->height = outlink->h; @@ -459,4 +487,5 @@ AVFilter ff_vf_xbr = { .priv_size = sizeof(XBRContext), .priv_class = &xbr_class, .init = init, + .flags = AVFILTER_FLAG_SLICE_THREADS, }; -- cgit v1.2.3