summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-26 17:31:58 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-26 17:31:58 +0100
commita4a5fc2666bfd3ca009ce6c7d05bc83ff4d57313 (patch)
tree5bf0fa534ad8794cecd4d75580f372affe4ff7ff
parent0e3f76cc16fa93a2b8d58922bae090828bfbf05a (diff)
egs_exact: do not construct the matrix more often than necessary
It does not change unless the diff coeffs change.
-rw-r--r--ell_grid_solve.c42
-rw-r--r--ell_grid_solve.h4
-rw-r--r--mg2d.c10
-rw-r--r--relax_test.c2
4 files changed, 43 insertions, 15 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index dc0210e..d597e58 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -56,12 +56,16 @@ typedef struct EGSExactInternal {
double *mat;
double *mat_f;
double *rhs;
+ double *rhs_mat;
+ double *rhs_factor;
double *x;
int *ipiv;
double *scratch_lines;
ptrdiff_t scratch_lines_allocated;
+ int mat_filled;
+
BiCGStabContext *bicgstab;
} EGSExactInternal;
@@ -416,7 +420,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src);
- e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
+ e->rhs_factor[mat_row_idx] = 1.0;
if (boundary) {
double *mat_dst = e->mat + mat_row_idx;
@@ -435,7 +439,8 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
bnd_fixval = ctx->boundaries[bnd_loc];
mat_dst[mat_row_idx * mat_stride_dst] = 1.0;
- e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]];
+ e->rhs_mat[mat_row_idx] = bnd_fixval->val[idx[!ci]];
+ e->rhs_factor[mat_row_idx] = 0.0;
memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
break;
}
@@ -483,7 +488,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
for (ptrdiff_t bnd_layer = 1; bnd_layer < ctx->fd_stencil; bnd_layer++)
for (ptrdiff_t bnd_idx = -((ptrdiff_t)ctx->fd_stencil - 1); bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil - 1); bnd_idx++) {
- e->rhs[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+ e->rhs_mat[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
}
}
@@ -560,14 +565,23 @@ static int solve_exact(EGSContext *ctx)
start = gettime();
- memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
- tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx);
+ if (!e->mat_filled) {
+ memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
+ memset(e->rhs_mat, 0, e->N * sizeof(*e->rhs_mat));
+ tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx);
- ec->time_mat_construct += gettime() - start;
- ec->count_mat_construct++;
+ e->mat_filled = 1;
+ }
- start = gettime();
+ for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
+ for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
+ const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
+ const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
+ e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * ctx->rhs->data[idx_src] + e->rhs_mat[mat_row_idx];
+ }
+ ec->time_mat_construct += gettime() - start;
+ ec->count_mat_construct++;
start = gettime();
@@ -605,6 +619,7 @@ static int solve_exact(EGSContext *ctx)
return ret;
}
+ start = gettime();
for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
@@ -636,7 +651,7 @@ int mg2di_egs_solve(EGSContext *ctx)
return ret;
}
-int mg2di_egs_init(EGSContext *ctx)
+int mg2di_egs_init(EGSContext *ctx, int flags)
{
EGSInternal *priv = ctx->priv;
int64_t start, start_init;
@@ -681,6 +696,9 @@ int mg2di_egs_init(EGSContext *ctx)
EGSExactInternal *e = &priv->e;
unsigned int nb_threads = tp_get_nb_threads(ctx->tp);
+ if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS))
+ e->mat_filled = 0;
+
switch (ctx->fd_stencil) {
case 1: e->fill_mat = fill_mat_s1; break;
case 2: e->fill_mat = fill_mat_s2; break;
@@ -818,9 +836,11 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
e->mat = calloc(SQR(e->N), sizeof(*e->mat));
e->mat_f = calloc(SQR(e->N), sizeof(*e->mat_f));
e->rhs = calloc(e->N, sizeof(*e->rhs));
+ e->rhs_mat = calloc(e->N, sizeof(*e->rhs_mat));
+ e->rhs_factor = calloc(e->N, sizeof(*e->rhs_factor));
e->x = calloc(e->N, sizeof(*e->x));
e->ipiv = calloc(e->N, sizeof(*e->ipiv));
- if (!e->mat || !e->mat_f || !e->rhs || !e->x || !e->ipiv)
+ if (!e->mat || !e->mat_f || !e->rhs || !e->rhs_mat || !e->rhs_factor || !e->x || !e->ipiv)
return -ENOMEM;
ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 64);
@@ -905,6 +925,8 @@ void mg2di_egs_free(EGSContext **pctx)
free(e->mat);
free(e->mat_f);
free(e->rhs);
+ free(e->rhs_mat);
+ free(e->rhs_factor);
free(e->x);
free(e->ipiv);
diff --git a/ell_grid_solve.h b/ell_grid_solve.h
index 182b087..0718b67 100644
--- a/ell_grid_solve.h
+++ b/ell_grid_solve.h
@@ -211,6 +211,8 @@ typedef struct EGSContext {
int64_t time_total;
} EGSContext;
+#define EGS_INIT_FLAG_SAME_DIFF_COEFFS (1 << 0)
+
/**
* Allocate the solver for the given domain size.
*
@@ -229,7 +231,7 @@ EGSContext *mg2di_egs_alloc(enum EGSType solver_type, size_t domain_size[2]);
*
* @return 0 on success, a negative error code on failure.
*/
-int mg2di_egs_init(EGSContext *ctx);
+int mg2di_egs_init(EGSContext *ctx, int flags);
/**
* Free the solver and write NULL to the provided pointer.
*/
diff --git a/mg2d.c b/mg2d.c
index 01c7624..1326a38 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -40,6 +40,7 @@ typedef struct MG2DLevel {
unsigned int depth;
EGSContext *solver;
+ int egs_init_flags;
GridTransferContext *transfer_restrict;
GridTransferContext *transfer_prolong;
@@ -139,9 +140,10 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
level->solver->domain_size[1]);
/* re-init the current-level solver (re-calc the residual) */
- ret = mg2di_egs_init(level->solver);
+ ret = mg2di_egs_init(level->solver, level->egs_init_flags);
if (ret < 0)
return ret;
+ level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
}
res_old = level->solver->residual_max;
@@ -198,7 +200,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
/* re-init the current-level solver (re-calc the residual) */
res_prev = level->solver->residual_max;
start = gettime();
- ret = mg2di_egs_init(level->solver);
+ ret = mg2di_egs_init(level->solver, 0);
if (ret < 0)
return ret;
level->time_reinit += gettime() - start;
@@ -434,6 +436,8 @@ static int mg_levels_init(MG2DContext *ctx)
return ret;
}
+ cur->egs_init_flags &= ~EGS_INIT_FLAG_SAME_DIFF_COEFFS;
+
cur = cur->child;
}
@@ -490,7 +494,7 @@ int mg2d_solve(MG2DContext *ctx)
if (ret < 0)
return ret;
- ret = mg2di_egs_init(s_root);
+ ret = mg2di_egs_init(s_root, 0);
if (ret < 0)
return ret;
diff --git a/relax_test.c b/relax_test.c
index 54155ca..5c07107 100644
--- a/relax_test.c
+++ b/relax_test.c
@@ -134,7 +134,7 @@ int main(int argc, char **argv)
sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
}
- ret = mg2di_egs_init(ctx);
+ ret = mg2di_egs_init(ctx, 0);
if (ret < 0) {
fprintf(stderr, "Error initializing the solver context: %d\n", ret);
ret = 1;