summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-04-23 15:25:13 +0200
committerAnton Khirnov <anton@khirnov.net>2019-04-23 15:41:02 +0200
commitb1e72ea506ddac2d353cfedf2536372b302fabdd (patch)
treee128c18a0c6a76219c751dcb211fe9672184af55
parentb2d93a84e3a9c84e8c591c9f55355b5834c03d4b (diff)
egs: parallelize diff_coeffs premult operation
-rw-r--r--ell_grid_solve.c42
1 files changed, 32 insertions, 10 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 6646992..4cb87ed 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -657,11 +657,19 @@ int mg2di_egs_solve(EGSContext *ctx)
return ret;
}
-static void init_diff_coeffs(NDArray *dst, const NDArray *src, double factor)
+static int init_diff_coeffs_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
{
- for (ptrdiff_t idx0 = 0; idx0 < dst->shape[0]; idx0++)
- for (ptrdiff_t idx1 = 0; idx1 < dst->shape[1]; idx1++)
- *NDPTR2D(dst, idx1, idx0) = *NDPTR2D(src, idx1, idx0) * factor;
+ struct { EGSContext *ctx; enum MG2DDiffCoeff dc; double fact; } *a = arg;
+ EGSContext *ctx = a->ctx;
+ EGSInternal *priv = ctx->priv;
+ NDArray *dst = priv->diff_coeffs[a->dc];
+ const NDArray *src = ctx->diff_coeffs[a->dc];
+ const double factor = a->fact;
+
+ for (ptrdiff_t idx1 = 0; idx1 < dst->shape[1]; idx1++)
+ *NDPTR2D(dst, idx1, job_idx) = *NDPTR2D(src, idx1, job_idx) * factor;
+
+ return 0;
}
int mg2di_egs_init(EGSContext *ctx, int flags)
@@ -704,12 +712,26 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
}
if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) {
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_00], ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00]);
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_10], ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]));
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_01], ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]));
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_20], ctx->diff_coeffs[MG2D_DIFF_COEFF_20], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0])));
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_02], ctx->diff_coeffs[MG2D_DIFF_COEFF_02], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1])));
- init_diff_coeffs(priv->diff_coeffs[MG2D_DIFF_COEFF_11], ctx->diff_coeffs[MG2D_DIFF_COEFF_11], 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]));
+ struct { EGSContext *ctx; enum MG2DDiffCoeff dc; double fact; } arg = { .ctx = ctx };
+
+ arg.dc = MG2D_DIFF_COEFF_00;
+ arg.fact = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00];
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ arg.dc = MG2D_DIFF_COEFF_10;
+ arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]);
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ arg.dc = MG2D_DIFF_COEFF_01;
+ arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]);
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ arg.dc = MG2D_DIFF_COEFF_20;
+ arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0]));
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ arg.dc = MG2D_DIFF_COEFF_02;
+ arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1]));
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ arg.dc = MG2D_DIFF_COEFF_11;
+ arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
+ tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
}
if (ctx->solver_type == EGS_SOLVER_EXACT) {