From a4a5fc2666bfd3ca009ce6c7d05bc83ff4d57313 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 26 Mar 2019 17:31:58 +0100 Subject: egs_exact: do not construct the matrix more often than necessary It does not change unless the diff coeffs change. --- ell_grid_solve.c | 42 ++++++++++++++++++++++++++++++++---------- ell_grid_solve.h | 4 +++- mg2d.c | 10 +++++++--- relax_test.c | 2 +- 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/ell_grid_solve.c b/ell_grid_solve.c index dc0210e..d597e58 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -56,12 +56,16 @@ typedef struct EGSExactInternal { double *mat; double *mat_f; double *rhs; + double *rhs_mat; + double *rhs_factor; double *x; int *ipiv; double *scratch_lines; ptrdiff_t scratch_lines_allocated; + int mat_filled; + BiCGStabContext *bicgstab; } EGSExactInternal; @@ -416,7 +420,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src); - e->rhs[mat_row_idx] = ctx->rhs->data[idx_src]; + e->rhs_factor[mat_row_idx] = 1.0; if (boundary) { double *mat_dst = e->mat + mat_row_idx; @@ -435,7 +439,8 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, bnd_fixval = ctx->boundaries[bnd_loc]; mat_dst[mat_row_idx * mat_stride_dst] = 1.0; - e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]]; + e->rhs_mat[mat_row_idx] = bnd_fixval->val[idx[!ci]]; + e->rhs_factor[mat_row_idx] = 0.0; memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line)); break; } @@ -483,7 +488,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, for (ptrdiff_t bnd_layer = 1; bnd_layer < ctx->fd_stencil; bnd_layer++) for (ptrdiff_t bnd_idx = -((ptrdiff_t)ctx->fd_stencil - 1); bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil - 1); bnd_idx++) { - e->rhs[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]]; + e->rhs_mat[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]]; dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0; } } @@ -560,14 +565,23 @@ static int solve_exact(EGSContext *ctx) start = gettime(); - memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat)); - tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx); + if (!e->mat_filled) { + memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat)); + memset(e->rhs_mat, 0, e->N * sizeof(*e->rhs_mat)); + tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx); - ec->time_mat_construct += gettime() - start; - ec->count_mat_construct++; + e->mat_filled = 1; + } - start = gettime(); + for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) + for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { + const ptrdiff_t idx_src = idx1 * priv->stride + idx0; + const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0; + e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * ctx->rhs->data[idx_src] + e->rhs_mat[mat_row_idx]; + } + ec->time_mat_construct += gettime() - start; + ec->count_mat_construct++; start = gettime(); @@ -605,6 +619,7 @@ static int solve_exact(EGSContext *ctx) return ret; } + start = gettime(); for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x)); @@ -636,7 +651,7 @@ int mg2di_egs_solve(EGSContext *ctx) return ret; } -int mg2di_egs_init(EGSContext *ctx) +int mg2di_egs_init(EGSContext *ctx, int flags) { EGSInternal *priv = ctx->priv; int64_t start, start_init; @@ -681,6 +696,9 @@ int mg2di_egs_init(EGSContext *ctx) EGSExactInternal *e = &priv->e; unsigned int nb_threads = tp_get_nb_threads(ctx->tp); + if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) + e->mat_filled = 0; + switch (ctx->fd_stencil) { case 1: e->fill_mat = fill_mat_s1; break; case 2: e->fill_mat = fill_mat_s2; break; @@ -818,9 +836,11 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) e->mat = calloc(SQR(e->N), sizeof(*e->mat)); e->mat_f = calloc(SQR(e->N), sizeof(*e->mat_f)); e->rhs = calloc(e->N, sizeof(*e->rhs)); + e->rhs_mat = calloc(e->N, sizeof(*e->rhs_mat)); + e->rhs_factor = calloc(e->N, sizeof(*e->rhs_factor)); e->x = calloc(e->N, sizeof(*e->x)); e->ipiv = calloc(e->N, sizeof(*e->ipiv)); - if (!e->mat || !e->mat_f || !e->rhs || !e->x || !e->ipiv) + if (!e->mat || !e->mat_f || !e->rhs || !e->rhs_mat || !e->rhs_factor || !e->x || !e->ipiv) return -ENOMEM; ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 64); @@ -905,6 +925,8 @@ void mg2di_egs_free(EGSContext **pctx) free(e->mat); free(e->mat_f); free(e->rhs); + free(e->rhs_mat); + free(e->rhs_factor); free(e->x); free(e->ipiv); diff --git a/ell_grid_solve.h b/ell_grid_solve.h index 182b087..0718b67 100644 --- a/ell_grid_solve.h +++ b/ell_grid_solve.h @@ -211,6 +211,8 @@ typedef struct EGSContext { int64_t time_total; } EGSContext; +#define EGS_INIT_FLAG_SAME_DIFF_COEFFS (1 << 0) + /** * Allocate the solver for the given domain size. * @@ -229,7 +231,7 @@ EGSContext *mg2di_egs_alloc(enum EGSType solver_type, size_t domain_size[2]); * * @return 0 on success, a negative error code on failure. */ -int mg2di_egs_init(EGSContext *ctx); +int mg2di_egs_init(EGSContext *ctx, int flags); /** * Free the solver and write NULL to the provided pointer. */ diff --git a/mg2d.c b/mg2d.c index 01c7624..1326a38 100644 --- a/mg2d.c +++ b/mg2d.c @@ -40,6 +40,7 @@ typedef struct MG2DLevel { unsigned int depth; EGSContext *solver; + int egs_init_flags; GridTransferContext *transfer_restrict; GridTransferContext *transfer_prolong; @@ -139,9 +140,10 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) level->solver->domain_size[1]); /* re-init the current-level solver (re-calc the residual) */ - ret = mg2di_egs_init(level->solver); + ret = mg2di_egs_init(level->solver, level->egs_init_flags); if (ret < 0) return ret; + level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; } res_old = level->solver->residual_max; @@ -198,7 +200,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) /* re-init the current-level solver (re-calc the residual) */ res_prev = level->solver->residual_max; start = gettime(); - ret = mg2di_egs_init(level->solver); + ret = mg2di_egs_init(level->solver, 0); if (ret < 0) return ret; level->time_reinit += gettime() - start; @@ -434,6 +436,8 @@ static int mg_levels_init(MG2DContext *ctx) return ret; } + cur->egs_init_flags &= ~EGS_INIT_FLAG_SAME_DIFF_COEFFS; + cur = cur->child; } @@ -490,7 +494,7 @@ int mg2d_solve(MG2DContext *ctx) if (ret < 0) return ret; - ret = mg2di_egs_init(s_root); + ret = mg2di_egs_init(s_root, 0); if (ret < 0) return ret; diff --git a/relax_test.c b/relax_test.c index 54155ca..5c07107 100644 --- a/relax_test.c +++ b/relax_test.c @@ -134,7 +134,7 @@ int main(int argc, char **argv) sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); } - ret = mg2di_egs_init(ctx); + ret = mg2di_egs_init(ctx, 0); if (ret < 0) { fprintf(stderr, "Error initializing the solver context: %d\n", ret); ret = 1; -- cgit v1.2.3