aboutsummaryrefslogtreecommitdiff
path: root/ell_grid_solve.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-26 17:31:58 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-26 17:31:58 +0100
commita4a5fc2666bfd3ca009ce6c7d05bc83ff4d57313 (patch)
tree5bf0fa534ad8794cecd4d75580f372affe4ff7ff /ell_grid_solve.c
parent0e3f76cc16fa93a2b8d58922bae090828bfbf05a (diff)
egs_exact: do not construct the matrix more often than necessary
It does not change unless the diff coeffs change.
Diffstat (limited to 'ell_grid_solve.c')
-rw-r--r--ell_grid_solve.c42
1 files changed, 32 insertions, 10 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index dc0210e..d597e58 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -56,12 +56,16 @@ typedef struct EGSExactInternal {
double *mat;
double *mat_f;
double *rhs;
+ double *rhs_mat;
+ double *rhs_factor;
double *x;
int *ipiv;
double *scratch_lines;
ptrdiff_t scratch_lines_allocated;
+ int mat_filled;
+
BiCGStabContext *bicgstab;
} EGSExactInternal;
@@ -416,7 +420,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src);
- e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
+ e->rhs_factor[mat_row_idx] = 1.0;
if (boundary) {
double *mat_dst = e->mat + mat_row_idx;
@@ -435,7 +439,8 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
bnd_fixval = ctx->boundaries[bnd_loc];
mat_dst[mat_row_idx * mat_stride_dst] = 1.0;
- e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]];
+ e->rhs_mat[mat_row_idx] = bnd_fixval->val[idx[!ci]];
+ e->rhs_factor[mat_row_idx] = 0.0;
memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
break;
}
@@ -483,7 +488,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
for (ptrdiff_t bnd_layer = 1; bnd_layer < ctx->fd_stencil; bnd_layer++)
for (ptrdiff_t bnd_idx = -((ptrdiff_t)ctx->fd_stencil - 1); bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil - 1); bnd_idx++) {
- e->rhs[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+ e->rhs_mat[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
}
}
@@ -560,14 +565,23 @@ static int solve_exact(EGSContext *ctx)
start = gettime();
- memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
- tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx);
+ if (!e->mat_filled) {
+ memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
+ memset(e->rhs_mat, 0, e->N * sizeof(*e->rhs_mat));
+ tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx);
- ec->time_mat_construct += gettime() - start;
- ec->count_mat_construct++;
+ e->mat_filled = 1;
+ }
- start = gettime();
+ for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
+ for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
+ const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
+ const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
+ e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * ctx->rhs->data[idx_src] + e->rhs_mat[mat_row_idx];
+ }
+ ec->time_mat_construct += gettime() - start;
+ ec->count_mat_construct++;
start = gettime();
@@ -605,6 +619,7 @@ static int solve_exact(EGSContext *ctx)
return ret;
}
+ start = gettime();
for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
@@ -636,7 +651,7 @@ int mg2di_egs_solve(EGSContext *ctx)
return ret;
}
-int mg2di_egs_init(EGSContext *ctx)
+int mg2di_egs_init(EGSContext *ctx, int flags)
{
EGSInternal *priv = ctx->priv;
int64_t start, start_init;
@@ -681,6 +696,9 @@ int mg2di_egs_init(EGSContext *ctx)
EGSExactInternal *e = &priv->e;
unsigned int nb_threads = tp_get_nb_threads(ctx->tp);
+ if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS))
+ e->mat_filled = 0;
+
switch (ctx->fd_stencil) {
case 1: e->fill_mat = fill_mat_s1; break;
case 2: e->fill_mat = fill_mat_s2; break;
@@ -818,9 +836,11 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
e->mat = calloc(SQR(e->N), sizeof(*e->mat));
e->mat_f = calloc(SQR(e->N), sizeof(*e->mat_f));
e->rhs = calloc(e->N, sizeof(*e->rhs));
+ e->rhs_mat = calloc(e->N, sizeof(*e->rhs_mat));
+ e->rhs_factor = calloc(e->N, sizeof(*e->rhs_factor));
e->x = calloc(e->N, sizeof(*e->x));
e->ipiv = calloc(e->N, sizeof(*e->ipiv));
- if (!e->mat || !e->mat_f || !e->rhs || !e->x || !e->ipiv)
+ if (!e->mat || !e->mat_f || !e->rhs || !e->rhs_mat || !e->rhs_factor || !e->x || !e->ipiv)
return -ENOMEM;
ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 64);
@@ -905,6 +925,8 @@ void mg2di_egs_free(EGSContext **pctx)
free(e->mat);
free(e->mat_f);
free(e->rhs);
+ free(e->rhs_mat);
+ free(e->rhs_factor);
free(e->x);
free(e->ipiv);