From b1e72ea506ddac2d353cfedf2536372b302fabdd Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 23 Apr 2019 15:25:13 +0200 Subject: egs: parallelize diff_coeffs premult operation --- ell_grid_solve.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/ell_grid_solve.c b/ell_grid_solve.c index 6646992..4cb87ed 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -657,11 +657,19 @@ int mg2di_egs_solve(EGSContext *ctx) return ret; } -static void init_diff_coeffs(NDArray *dst, const NDArray *src, double factor) +static int init_diff_coeffs_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { - for (ptrdiff_t idx0 = 0; idx0 < dst->shape[0]; idx0++) - for (ptrdiff_t idx1 = 0; idx1 < dst->shape[1]; idx1++) - *NDPTR2D(dst, idx1, idx0) = *NDPTR2D(src, idx1, idx0) * factor; + struct { EGSContext *ctx; enum MG2DDiffCoeff dc; double fact; } *a = arg; + EGSContext *ctx = a->ctx; + EGSInternal *priv = ctx->priv; + NDArray *dst = priv->diff_coeffs[a->dc]; + const NDArray *src = ctx->diff_coeffs[a->dc]; + const double factor = a->fact; + + for (ptrdiff_t idx1 = 0; idx1 < dst->shape[1]; idx1++) + *NDPTR2D(dst, idx1, job_idx) = *NDPTR2D(src, idx1, job_idx) * factor; + + return 0; } int mg2di_egs_init(EGSContext *ctx, int flags) @@ -704,12 +712,26 @@ int mg2di_egs_init(EGSContext *ctx, int flags) } if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) { - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_00], ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00]); - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_10], ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0])); - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_01], ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1])); - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_20], ctx->diff_coeffs[MG2D_DIFF_COEFF_20], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0]))); - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_02], ctx->diff_coeffs[MG2D_DIFF_COEFF_02], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1]))); - init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_11], ctx->diff_coeffs[MG2D_DIFF_COEFF_11], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1])); + struct { EGSContext *ctx; enum MG2DDiffCoeff dc; double fact; } arg = { .ctx = ctx }; + + arg.dc = MG2D_DIFF_COEFF_00; + arg.fact = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00]; + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + arg.dc = MG2D_DIFF_COEFF_10; + arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]); + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + arg.dc = MG2D_DIFF_COEFF_01; + arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]); + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + arg.dc = MG2D_DIFF_COEFF_20; + arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0])); + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + arg.dc = MG2D_DIFF_COEFF_02; + arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1])); + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + arg.dc = MG2D_DIFF_COEFF_11; + arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); + tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); } if (ctx->solver_type == EGS_SOLVER_EXACT) { -- cgit v1.2.3