aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-06-18 12:00:45 +0200
committerAnton Khirnov <anton@khirnov.net>2019-06-29 11:19:32 +0200
commit2bd55ca0e3fb500bff5cf5ceb36a80a196f0f29d (patch)
tree8762f87a49b034e3dddda0ae6b96130465027964
parent9f865c12f3870a58799c1ff8b3f60d2b53b7c6fc (diff)
implement several relax steps per sync
-rw-r--r--ell_grid_solve.c135
1 files changed, 105 insertions, 30 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 996d450..97b8688 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -91,8 +91,10 @@ struct EGSInternal {
NDArray *diff_coeffs_base;
NDArray *diff_coeffs[MG2D_DIFF_COEFF_NB];
- ptrdiff_t residual_calc_offset[2];
- size_t residual_calc_size[2];
+ ptrdiff_t (*residual_calc_offset)[2];
+ size_t (*residual_calc_size)[2];
+ ptrdiff_t bnd_calc_offset[2];
+ size_t bnd_calc_size[2];
int bnd_zero[4];
int reflect_skip;
@@ -110,6 +112,9 @@ struct EGSInternal {
size_t fd_stencil;
+ unsigned int steps_per_sync;
+ unsigned int steps_since_sync;
+
int *sync_sendcounts;
int *sync_senddispl;
MPI_Datatype *sync_sendtypes;
@@ -141,38 +146,40 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst)
{
EGSInternal *priv = ctx->priv;
- mg2di_timer_start(&ctx->timer_mpi_sync);
+ if (priv->dg->nb_components > 1) {
+ mg2di_timer_start(&ctx->timer_mpi_sync);
- MPI_Alltoallw(a_dst->data, priv->sync_sendcounts, priv->sync_senddispl, priv->sync_sendtypes,
- a_dst->data, priv->sync_recvcounts, priv->sync_recvdispl, priv->sync_recvtypes,
- priv->comm);
+ MPI_Alltoallw(a_dst->data, priv->sync_sendcounts, priv->sync_senddispl, priv->sync_sendtypes,
+ a_dst->data, priv->sync_recvcounts, priv->sync_recvdispl, priv->sync_recvtypes,
+ priv->comm);
- mg2di_timer_stop(&ctx->timer_mpi_sync);
+ mg2di_timer_stop(&ctx->timer_mpi_sync);
+ }
}
static void residual_calc(EGSContext *ctx, int export_res)
{
EGSInternal *priv = ctx->priv;
const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
+ ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync];
+ size_t *size = priv->residual_calc_size[priv->steps_since_sync];
+ NDArray *dst = export_res ? ctx->residual : priv->u_next;
int reflect_flags = 0;
mg2di_timer_start(&ctx->timer_res_calc);
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = NDPTR2D(priv->diff_coeffs[i], priv->residual_calc_offset[1], priv->residual_calc_offset[0]);
+ diff_coeffs[i] = NDPTR2D(priv->diff_coeffs[i], offset[0], offset[1]);
for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++)
if (ctx->boundaries[bnd_idx]->type == MG2D_BC_TYPE_REFLECT &&
priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx])
reflect_flags |= 1 << bnd_idx;
- mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, &ctx->residual_max,
- NDPTR2D(export_res ? ctx->residual : priv->u_next, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
- export_res ? ctx->residual->stride[0] : priv->u_next->stride[0],
- NDPTR2D(ctx->u, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
- ctx->u->stride[0],
- NDPTR2D(ctx->rhs, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
- ctx->rhs->stride[0],
+ mg2di_residual_calc(priv->rescalc, size, &ctx->residual_max,
+ NDPTR2D(dst, offset[0], offset[1]), dst->stride[0],
+ NDPTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0],
+ NDPTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0],
diff_coeffs, priv->diff_coeffs[0]->stride[0],
export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor,
reflect_flags, FD_STENCIL_MAX);
@@ -257,9 +264,10 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init)
const ptrdiff_t stride_boundary = strides[!ci];
const ptrdiff_t stride_offset = strides[ci];
const size_t size_boundary = ctx->domain_size[!ci];
+ //const size_t size_boundary = priv->bnd_calc_size[!ci];
const size_t size_offset = ctx->domain_size[ci];
- double *dst = a_dst->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
+ double *dst = a_dst->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);// + priv->bnd_calc_offset[!ci] * stride_boundary;
const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset };
if (bnd->type != bnd_type_order[order_idx] ||
@@ -335,9 +343,6 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init)
}
mg2di_timer_stop(&ctx->timer_bnd_corners);
- if (priv->dg->nb_components > 1)
- boundaries_sync(ctx, a_dst);
-
mg2di_timer_stop(&ctx->timer_bnd);
}
@@ -399,15 +404,26 @@ static int solve_relax_step(EGSContext *ctx, int export_res)
priv->reflect_skip = 0;
boundaries_apply(ctx, ctx->u, 0);
+
+ priv->steps_since_sync++;
+ if (priv->steps_since_sync >= priv->steps_per_sync) {
+ boundaries_sync(ctx, ctx->u);
+ priv->steps_since_sync = 0;
+ }
}
residual_calc(ctx, 1);
- if (priv->dg->nb_components > 1)
- boundaries_sync(ctx, ctx->residual);
+ boundaries_sync(ctx, ctx->residual);
} else {
mg2di_assert(u_next_valid);
residual_calc(ctx, 0);
boundaries_apply(ctx, priv->u_next, 0);
+
+ priv->steps_since_sync++;
+ if (priv->steps_since_sync >= priv->steps_per_sync) {
+ boundaries_sync(ctx, priv->u_next);
+ priv->steps_since_sync = 0;
+ }
priv->u_next_valid = 1;
}
@@ -863,7 +879,7 @@ static int init_mpi_sync(EGSContext *ctx)
}
ret = mg2di_dg_edge_overlaps(overlaps_recv, overlaps_send,
- dg, priv->local_component, FD_STENCIL_MAX);
+ dg, priv->local_component, ctx->fd_stencil * priv->steps_per_sync);
if (ret < 0)
goto fail;
@@ -1005,12 +1021,12 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
goto finish;
}
- priv->residual_calc_size[0] = ctx->domain_size[0];
- priv->residual_calc_size[1] = ctx->domain_size[1];
- priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL &&
- dc->bnd_is_outer[MG2D_BOUNDARY_1L];
- priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL &&
- dc->bnd_is_outer[MG2D_BOUNDARY_0L];
+ priv->residual_calc_size[priv->steps_per_sync - 1][0] = ctx->domain_size[0];
+ priv->residual_calc_size[priv->steps_per_sync - 1][1] = ctx->domain_size[1];
+ priv->residual_calc_offset[priv->steps_per_sync - 1][0] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL &&
+ dc->bnd_is_outer[MG2D_BOUNDARY_0L];
+ priv->residual_calc_offset[priv->steps_per_sync - 1][1] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL &&
+ dc->bnd_is_outer[MG2D_BOUNDARY_1L];
for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(ctx->boundaries); bnd_idx++) {
MG2DBoundary *bnd = ctx->boundaries[bnd_idx];
@@ -1031,7 +1047,27 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
priv->bnd_zero[bnd_idx] = maxval == 0.0;
- priv->residual_calc_size[ci]--;
+ priv->residual_calc_size[priv->steps_per_sync - 1][ci]--;
+ }
+ }
+
+ for (int step = priv->steps_per_sync - 2; step >= 0; step--) {
+ const int buffer_size = ctx->fd_stencil;
+ int ic_0l = !dc->bnd_is_outer[MG2D_BOUNDARY_0L];
+ int ic_0u = !dc->bnd_is_outer[MG2D_BOUNDARY_0U];
+ int ic_1l = !dc->bnd_is_outer[MG2D_BOUNDARY_1L];
+ int ic_1u = !dc->bnd_is_outer[MG2D_BOUNDARY_1U];
+
+ priv->residual_calc_offset[step][0] = priv->residual_calc_offset[step + 1][0] - buffer_size * ic_0l;
+ priv->residual_calc_offset[step][1] = priv->residual_calc_offset[step + 1][1] - buffer_size * ic_1l;
+ priv->residual_calc_size[step][0] = priv->residual_calc_size[step + 1][0] + buffer_size * (ic_0l + ic_0u);
+ priv->residual_calc_size[step][1] = priv->residual_calc_size[step + 1][1] + buffer_size * (ic_1l + ic_1u);
+
+ if (step == 0) {
+ priv->bnd_calc_offset[0] = -buffer_size * ic_0l;
+ priv->bnd_calc_offset[1] = -buffer_size * ic_1l;
+ priv->bnd_calc_size[0] = ctx->domain_size[0] + buffer_size * (ic_0l + ic_0u);
+ priv->bnd_calc_size[1] = ctx->domain_size[1] + buffer_size * (ic_1l + ic_1u);
}
}
@@ -1049,11 +1085,27 @@ finish:
if (ret >= 0) {
priv->reflect_skip = 0;
+ if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) {
+ for (int dc_idx = 0; dc_idx < ARRAY_ELEMS(priv->diff_coeffs); dc_idx++)
+ boundaries_sync(ctx, priv->diff_coeffs[dc_idx]);
+ }
+ boundaries_sync(ctx, ctx->rhs);
+
boundaries_apply(ctx, ctx->u, 1);
+ boundaries_sync(ctx, ctx->u);
+ priv->steps_since_sync = 0;
+
boundaries_apply(ctx, priv->u_next, 1);
+ boundaries_sync(ctx, priv->u_next);
residual_calc(ctx, 0);
boundaries_apply(ctx, priv->u_next, 0);
+
+ priv->steps_since_sync++;
+ if (priv->steps_since_sync >= priv->steps_per_sync) {
+ boundaries_sync(ctx, priv->u_next);
+ priv->steps_since_sync = 0;
+ }
priv->u_next_valid = 1;
}
@@ -1077,7 +1129,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
int ret;
for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++)
- extra_points[bnd_idx] = dc->bnd_is_outer[bnd_idx] ? 0 : FD_STENCIL_MAX;
+ extra_points[bnd_idx] = dc->bnd_is_outer[bnd_idx] ? 0 : FD_STENCIL_MAX * priv->steps_per_sync;
size_alloc[0] = dc->exterior.size[1] + extra_points[MG2D_BOUNDARY_1L] + extra_points[MG2D_BOUNDARY_1U];
size_alloc[1] = dc->exterior.size[0] + extra_points[MG2D_BOUNDARY_0L] + extra_points[MG2D_BOUNDARY_0U];
@@ -1206,6 +1258,26 @@ static EGSContext *egs_alloc(const DomainGeometry *dg, unsigned int local_compon
ctx->priv->local_component = local_component;
ctx->priv->comm = MPI_COMM_NULL;
+ if (dg->nb_components > 1) {
+ char *steps = getenv("MG2D_STEPS_PER_SYNC");
+ if (steps)
+ ctx->priv->steps_per_sync = strtol(steps, NULL, 0);
+
+ if (ctx->priv->steps_per_sync <= 0)
+ ctx->priv->steps_per_sync = 2;
+ for (int i = 0; i < dg->nb_components; i++) {
+ if (dg->components[i].interior.size[0] < ctx->priv->steps_per_sync * FD_STENCIL_MAX ||
+ dg->components[i].interior.size[1] < ctx->priv->steps_per_sync * FD_STENCIL_MAX) {
+ ctx->priv->steps_per_sync = 1;
+ break;
+ }
+ }
+ } else
+ ctx->priv->steps_per_sync = 1;
+
+ ctx->priv->residual_calc_offset = calloc(ctx->priv->steps_per_sync, sizeof(*ctx->priv->residual_calc_offset));
+ ctx->priv->residual_calc_size = calloc(ctx->priv->steps_per_sync, sizeof(*ctx->priv->residual_calc_size));
+
mg2di_timer_init(&ctx->exact->timer_mat_construct);
mg2di_timer_init(&ctx->exact->timer_bicgstab);
mg2di_timer_init(&ctx->exact->timer_lu_solve);
@@ -1351,6 +1423,9 @@ void mg2di_egs_free(EGSContext **pctx)
free(ctx->priv->sync_recvcounts);
free(ctx->priv->sync_recvdispl);
+ free(ctx->priv->residual_calc_offset);
+ free(ctx->priv->residual_calc_size);
+
mg2di_dg_free(&ctx->priv->dg);
tp_free(&ctx->priv->tp_internal);