aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-05-23 11:01:00 +0200
committerAnton Khirnov <anton@khirnov.net>2019-05-23 11:41:31 +0200
commit4c972cfc352ae5ba851cae142ca6fe594d88bc04 (patch)
treec1d2fe66022578b5c7695fd9695d3a69f0815429
parent5d7d6aae888a9e85b68b0663833b7200ea3f1e7c (diff)
egs: add support for MPI-based multi-component solves
-rw-r--r--components.c251
-rw-r--r--components.h73
-rw-r--r--ell_grid_solve.c253
-rw-r--r--ell_grid_solve.h26
-rw-r--r--meson.build7
-rw-r--r--mg2d.c2
-rw-r--r--relax_mpi_test.c236
7 files changed, 813 insertions, 35 deletions
diff --git a/components.c b/components.c
new file mode 100644
index 0000000..ef41653
--- /dev/null
+++ b/components.c
@@ -0,0 +1,251 @@
+/*
+ * Multi-component utility code
+ * Copyright 2019 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common.h"
+#include "components.h"
+#include "mg2d_boundary.h"
+
+int mg2di_dg_copy(DomainGeometry **pdst, const DomainGeometry *src)
+{
+ DomainGeometry *dst;
+
+ dst = mg2di_dg_alloc(src->nb_components);
+ if (!dst)
+ return -ENOMEM;
+
+ memcpy(dst->components, src->components,
+ src->nb_components * sizeof(*src->components));
+ dst->domain_size[0] = src->domain_size[0];
+ dst->domain_size[1] = src->domain_size[1];
+
+ *pdst = dst;
+
+ return 0;
+}
+
+DomainGeometry *mg2di_dg_alloc(unsigned int nb_components)
+{
+ DomainGeometry *dg;
+
+ dg = calloc(1, sizeof(*dg));
+ if (!dg)
+ return NULL;
+
+ dg->components = calloc(nb_components, sizeof(*dg->components));
+ if (!dg->components)
+ goto fail;
+ dg->nb_components = nb_components;
+
+ return dg;
+fail:
+ mg2di_dg_free(&dg);
+ return NULL;
+}
+
+void mg2di_dg_free(DomainGeometry **pdg)
+{
+ DomainGeometry *dg = *pdg;
+
+ if (!dg)
+ return;
+
+ free(dg->components);
+
+ free(dg);
+ *pdg = NULL;
+}
+
+int mg2di_rect_intersect(Rect *dst, const Rect *src1, const Rect *src2)
+{
+ ptrdiff_t intersect_start0 = MAX(src1->start[0], src2->start[0]);
+ ptrdiff_t intersect_start1 = MAX(src1->start[1], src2->start[1]);
+ ptrdiff_t intersect_end0 = MIN(src1->start[0] + src1->size[0], src2->start[0] + src2->size[0]);
+ ptrdiff_t intersect_end1 = MIN(src1->start[1] + src1->size[1], src2->start[1] + src2->size[1]);
+
+ if (intersect_start0 < intersect_end0 && intersect_start1 < intersect_end1) {
+ dst->start[0] = intersect_start0;
+ dst->start[1] = intersect_start1;
+ dst->size[0] = intersect_end0 - intersect_start0;
+ dst->size[1] = intersect_end1 - intersect_start1;
+
+ return 1;
+ }
+
+ dst->size[0] = 0;
+ dst->size[1] = 0;
+
+ return 0;
+}
+
+/* merge adjacent/partially overlapping src into dst,
+ * if the result is rectangular */
+static int rect_merge(Rect *dst, const Rect *src)
+{
+ /* if dst is empty, copy src */
+ if (!dst->size[0] && !dst->size[1]) {
+ *dst = *src;
+ return 0;
+ }
+
+ /* if dst is fully inside src, copy src */
+ if (dst->start[0] >= src->start[0] &&
+ dst->start[0] + dst->size[0] <= src->start[0] + src->size[0] &&
+ dst->start[1] >= src->start[1] &&
+ dst->start[1] + dst->size[1] <= src->start[1] + src->size[1]) {
+ *dst = *src;
+ return 0;
+ }
+
+ /* if src is fully inside dst, do nothing */
+ if (src->start[0] >= dst->start[0] &&
+ src->start[0] + src->size[0] <= dst->start[0] + dst->size[0] &&
+ src->start[1] >= dst->start[1] &&
+ src->start[1] + src->size[1] <= dst->start[1] + dst->size[1])
+ return 0;
+
+ /* if src is adjacent or partially overlaps dst, merge them */
+ for (int dir = 0; dir < 2; dir++) {
+ if (dst->start[dir] != src->start[dir] ||
+ dst->size[dir] != src->size[dir])
+ continue;
+
+ /* merge from above */
+ if (src->start[!dir] >= dst->start[!dir] &&
+ src->start[!dir] <= dst->start[!dir] + dst->size[!dir]) {
+ dst->size[!dir] = MAX(dst->size[!dir], src->start[!dir] + src->size[!dir] - dst->start[!dir]);
+ return 0;
+ }
+
+ /* merge from below */
+ if (dst->start[!dir] >= src->start[!dir] &&
+ dst->start[!dir] <= src->start[!dir] + src->size[!dir]) {
+
+ dst->size[!dir] = MAX(src->size[!dir], dst->start[!dir] + dst->size[!dir] - src->start[!dir]);
+ dst->start[!dir] = src->start[!dir];
+ return 0;
+ }
+ }
+
+ /* src and dst do not match */
+ return -EINVAL;
+}
+
+static int calc_overlaps_component(Rect *overlaps, const DomainGeometry *dg,
+ unsigned int comp, int edge_width)
+{
+ const DomainComponent *c = &dg->components[comp];
+ int ret = 0;
+
+ Rect (*overlaps_tmp)[4];
+ int *nb_overlaps_tmp;
+
+ overlaps_tmp = calloc(dg->nb_components, sizeof(*overlaps_tmp));
+ nb_overlaps_tmp = calloc(dg->nb_components, sizeof(*nb_overlaps_tmp));
+ if (!overlaps_tmp || !nb_overlaps_tmp)
+ return -ENOMEM;
+
+ /* calculate the overlaps for each boundary layer */
+ for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++) {
+ const int ci = mg2d_bnd_coord_idx(bnd_idx);
+ const int upper = mg2d_bnd_is_upper(bnd_idx);
+ Rect bnd;
+
+ bnd.start[ci] = c->interior.start[ci] + (upper ? c->interior.size[ci] : -edge_width);
+ bnd.size[ci] = edge_width;
+
+ bnd.start[!ci] = c->interior.start[!ci] - edge_width;
+ bnd.size[!ci] = c->interior.size[!ci] + 2 * edge_width;
+
+ for (unsigned int comp_idx = 0; comp_idx < dg->nb_components; comp_idx++) {
+ const DomainComponent *c1 = &dg->components[comp_idx];
+ Rect *dst = &overlaps_tmp[comp_idx][nb_overlaps_tmp[comp_idx]];
+
+ if (comp_idx == comp)
+ continue;
+
+ mg2di_rect_intersect(dst, &bnd, &c1->exterior);
+ if (dst->size[0] && dst->size[1])
+ nb_overlaps_tmp[comp_idx]++;
+ }
+ }
+
+ /* merge all the overlaps, we should get a single rectangle for each component */
+ for (unsigned int comp_idx = 0; comp_idx < dg->nb_components; comp_idx++) {
+ while (nb_overlaps_tmp[comp_idx] > 1) {
+ Rect *dst = &overlaps_tmp[comp_idx][0];
+ int merged = 0;
+
+ for (int i = 1; i < nb_overlaps_tmp[comp_idx]; i++) {
+ Rect *src = &overlaps_tmp[comp_idx][i];
+ ret = rect_merge(dst, src);
+ if (ret < 0)
+ continue;
+
+ memmove(src, src + 1, nb_overlaps_tmp[comp_idx] - i - 1);
+ nb_overlaps_tmp[comp_idx]--;
+ merged = 1;
+ break;
+ }
+
+ if (!merged)
+ goto fail;
+ }
+ overlaps[comp_idx] = overlaps_tmp[comp_idx][0];
+ }
+
+fail:
+ free(overlaps_tmp);
+ free(nb_overlaps_tmp);
+
+ return ret;
+}
+
+int mg2di_dg_edge_overlaps(Rect *overlaps_recv, Rect *overlaps_send,
+ const DomainGeometry *dg, unsigned int comp,
+ unsigned int edge_width)
+{
+ Rect *overlaps = NULL;
+ int ret;
+
+ overlaps = calloc(SQR(dg->nb_components), sizeof(*overlaps));
+ if (!overlaps)
+ return -ENOMEM;
+
+ for (unsigned int i = 0; i < dg->nb_components; i++) {
+ ret = calc_overlaps_component(overlaps + i * dg->nb_components,
+ dg, i, edge_width);
+ if (ret < 0)
+ goto fail;
+ }
+
+ memcpy(overlaps_recv, overlaps + comp * dg->nb_components,
+ dg->nb_components * sizeof(*overlaps_recv));
+
+ for (unsigned int i = 0; i < dg->nb_components; i++)
+ overlaps_send[i] = overlaps[i * dg->nb_components + comp];
+
+fail:
+ free(overlaps);
+ return ret;
+}
diff --git a/components.h b/components.h
new file mode 100644
index 0000000..bc3c38f
--- /dev/null
+++ b/components.h
@@ -0,0 +1,73 @@
+/*
+ * Multi-component utility code
+ * Copyright 2019 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MG2D_COMPONENTS_H
+#define MG2D_COMPONENTS_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct Rect {
+ ptrdiff_t start[2];
+ size_t size[2];
+} Rect;
+
+typedef struct DomainComponent {
+ /* points that belong to this component */
+ Rect interior;
+ /* interior + ghost points on the physical boundaries */
+ Rect exterior;
+ int bnd_is_outer[4];
+} DomainComponent;
+
+typedef struct DomainGeometry {
+ size_t domain_size[2];
+
+ unsigned int nb_components;
+ DomainComponent *components;
+} DomainGeometry;
+
+DomainGeometry *mg2di_dg_alloc(unsigned int nb_components);
+void mg2di_dg_free(DomainGeometry **dg);
+
+int mg2di_dg_copy(DomainGeometry **dst, const DomainGeometry *src);
+
+/**
+ * Compute the intersection of src1 and src2, store the result in dst.
+ */
+int mg2di_rect_intersect(Rect *dst, const Rect *src1, const Rect *src2);
+
+/**
+ * For the component <comp> in domain geometry <dg>, internal edges are
+ * rectangles along each non-outer boundary. Edge overlap with component <comp1>
+ * is the intersection of internal edges of <comp> and exterior of <comp1> (must
+ * necessarily be a rectangle).
+ *
+ * NB: edge overlap of a component with itself is empty.
+ *
+ * This function computes, for the given component <comp>
+ * - edge overlaps of <comp> with every component in dg (i.e. the ghost points
+ * to be received from each remote component) - written into overlaps_recv
+ * - edge overlaps of every component in dg with <comp> (i.e. the ghost points
+ * to be set to each remote component) - written into overlaps_send
+ */
+int mg2di_dg_edge_overlaps(Rect *overlaps_recv, Rect *overlaps_send,
+ const DomainGeometry *dg, unsigned int comp,
+ unsigned int edge_width);
+
+#endif // MG2D_COMPONENTS_H
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 7dcd41e..8c37799 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -26,9 +26,11 @@
#include <threadpool.h>
#include <lapacke.h>
+#include <mpi.h>
#include "bicgstab.h"
#include "common.h"
+#include "components.h"
#include "cpu.h"
#include "ell_grid_solve.h"
#include "log.h"
@@ -76,7 +78,10 @@ struct EGSInternal {
NDArray *rhs_base;
NDArray *residual_base;
+ NDArray *u_base;
+
NDArray *u_next_base;
+ NDArray *u_next_exterior;
NDArray *u_next;
int u_next_valid;
@@ -98,6 +103,17 @@ struct EGSInternal {
EGSExactInternal e;
TPContext *tp_internal;
+
+ MPI_Comm comm;
+ DomainGeometry *dg;
+ unsigned int local_component;
+
+ int *sync_sendcounts;
+ int *sync_senddispl;
+ MPI_Datatype *sync_sendtypes;
+ int *sync_recvcounts;
+ int *sync_recvdispl;
+ MPI_Datatype *sync_recvtypes;
};
static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = {
@@ -119,6 +135,19 @@ static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = {
},
};
+static void boundaries_sync(EGSContext *ctx, NDArray *a_dst)
+{
+ EGSInternal *priv = ctx->priv;
+
+ mg2di_timer_start(&ctx->timer_mpi_sync);
+
+ MPI_Alltoallw(a_dst->data, priv->sync_sendcounts, priv->sync_senddispl, priv->sync_sendtypes,
+ a_dst->data, priv->sync_recvcounts, priv->sync_recvdispl, priv->sync_recvtypes,
+ priv->comm);
+
+ mg2di_timer_stop(&ctx->timer_mpi_sync);
+}
+
static void residual_calc(EGSContext *ctx, int export_res)
{
EGSInternal *priv = ctx->priv;
@@ -230,7 +259,8 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init)
double *dst = a_dst->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset };
- if (bnd->type != bnd_type_order[order_idx])
+ if (bnd->type != bnd_type_order[order_idx] ||
+ !priv->dg->components[priv->local_component].bnd_is_outer[i])
continue;
switch (bnd->type) {
@@ -291,6 +321,10 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init)
}
}
mg2di_timer_stop(&ctx->timer_bnd_corners);
+
+ if (priv->dg->nb_components > 1)
+ boundaries_sync(ctx, a_dst);
+
mg2di_timer_stop(&ctx->timer_bnd);
}
@@ -331,12 +365,15 @@ static int solve_relax_step(EGSContext *ctx, int export_res)
int u_next_valid = priv->u_next_valid;
if (u_next_valid) {
- NDArray *tmp = ctx->u;
- NDArray *tmp_base = ctx->u_base;
- ctx->u = priv->u_next;
- ctx->u_base = priv->u_next_base;
- priv->u_next = tmp;
- priv->u_next_base = tmp_base;
+ NDArray *tmp = ctx->u;
+ NDArray *tmp_ext = ctx->u_exterior;
+ NDArray *tmp_base = priv->u_base;
+ ctx->u = priv->u_next;
+ ctx->u_exterior = priv->u_next_exterior;
+ priv->u_base = priv->u_next_base;
+ priv->u_next = tmp;
+ priv->u_next_exterior = tmp_ext;
+ priv->u_next_base = tmp_base;
priv->u_next_valid = 0;
}
@@ -352,6 +389,8 @@ static int solve_relax_step(EGSContext *ctx, int export_res)
}
residual_calc(ctx, 1);
+ if (priv->dg->nb_components > 1)
+ boundaries_sync(ctx, ctx->residual);
} else {
mg2di_assert(u_next_valid);
residual_calc(ctx, 0);
@@ -799,6 +838,7 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
{
EGSInternal *priv = ctx->priv;
EGSRelaxContext *r = ctx->relax;
+ DomainComponent *dc = &priv->dg->components[priv->local_component];
int ret = 0;
mg2di_timer_start(&ctx->timer_solve);
@@ -837,22 +877,22 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
arg.dc = MG2D_DIFF_COEFF_00;
arg.fact = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00];
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
arg.dc = MG2D_DIFF_COEFF_10;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]);
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
arg.dc = MG2D_DIFF_COEFF_01;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]);
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
arg.dc = MG2D_DIFF_COEFF_20;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0]));
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
arg.dc = MG2D_DIFF_COEFF_02;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1]));
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
arg.dc = MG2D_DIFF_COEFF_11;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
- tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg);
+ tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
}
if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS))
@@ -870,13 +910,18 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
- priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL;
- priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL;
+ priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL &&
+ dc->bnd_is_outer[MG2D_BOUNDARY_1L];
+ priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL &&
+ dc->bnd_is_outer[MG2D_BOUNDARY_0L];
for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(ctx->boundaries); bnd_idx++) {
MG2DBoundary *bnd = ctx->boundaries[bnd_idx];
const int ci = mg2d_bnd_coord_idx(bnd_idx);
+ if (!dc->bnd_is_outer[bnd_idx])
+ continue;
+
if (bnd->type == MG2D_BC_TYPE_FIXVAL) {
double maxval = 0.0;
@@ -923,6 +968,7 @@ finish:
static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
{
EGSInternal *priv = ctx->priv;
+ const DomainComponent *dc = &priv->dg->components[priv->local_component];
const size_t ghosts = FD_STENCIL_MAX;
const size_t size_padded[2] = {
@@ -935,15 +981,23 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
};
const Slice slice[2] = { SLICE(ghosts, -ghosts, 1),
SLICE(ghosts, -ghosts, 1) };
+ Slice slice_exterior[2] = {
+ SLICE(dc->bnd_is_outer[MG2D_BOUNDARY_1L] ? 0 : ghosts,
+ dc->bnd_is_outer[MG2D_BOUNDARY_1U] ? 0 : -ghosts, 1),
+ SLICE(dc->bnd_is_outer[MG2D_BOUNDARY_0L] ? 0 : ghosts,
+ dc->bnd_is_outer[MG2D_BOUNDARY_0U] ? 0 : -ghosts, 1),
+ };
int ret;
- ret = mg2di_ndarray_alloc(&ctx->u_base, 2, size_padded, NDARRAY_ALLOC_ZERO);
+ ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ret = mg2di_ndarray_slice(&ctx->u, ctx->u_base,
- (Slice [2]){ SLICE(ghosts, size_padded[0] - ghosts, 1),
- SLICE(ghosts, -ghosts, 1) });
+ ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice);
+ if (ret < 0)
+ return ret;
+
+ ret = mg2di_ndarray_slice(&ctx->u_exterior, priv->u_base, slice_exterior);
if (ret < 0)
return ret;
@@ -951,9 +1005,11 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
if (ret < 0)
return ret;
- ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base,
- (Slice [2]){ SLICE(ghosts, size_padded[0] - ghosts, 1),
- SLICE(ghosts, -ghosts, 1) });
+ ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base, slice);
+ if (ret < 0)
+ return ret;
+
+ ret = mg2di_ndarray_slice(&priv->u_next_exterior, priv->u_next_base, slice_exterior);
if (ret < 0)
return ret;
@@ -1010,7 +1066,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
return 0;
}
-EGSContext *mg2di_egs_alloc(size_t domain_size[2])
+static EGSContext *egs_alloc(const DomainGeometry *dg, unsigned int local_component)
{
EGSContext *ctx;
int ret;
@@ -1032,21 +1088,28 @@ EGSContext *mg2di_egs_alloc(size_t domain_size[2])
if (!ctx->exact)
goto fail;
+ ret = mg2di_dg_copy(&ctx->priv->dg, dg);
+ if (ret < 0)
+ goto fail;
+
+ ctx->priv->local_component = local_component;
+ ctx->priv->comm = MPI_COMM_NULL;
+
mg2di_timer_init(&ctx->exact->timer_mat_construct);
mg2di_timer_init(&ctx->exact->timer_bicgstab);
mg2di_timer_init(&ctx->exact->timer_lu_solve);
mg2di_timer_init(&ctx->exact->timer_export);
- if (!domain_size[0] || !domain_size[1] ||
- domain_size[0] > SIZE_MAX / domain_size[1])
+ if (!dg->domain_size[0] || !dg->domain_size[1] ||
+ dg->domain_size[0] > SIZE_MAX / dg->domain_size[1])
goto fail;
- ret = arrays_alloc(ctx, domain_size);
+ ret = arrays_alloc(ctx, dg->components[local_component].interior.size);
if (ret < 0)
goto fail;
- ctx->domain_size[0] = domain_size[0];
- ctx->domain_size[1] = domain_size[1];
+ ctx->domain_size[0] = dg->components[local_component].interior.size[0];
+ ctx->domain_size[1] = dg->components[local_component].interior.size[1];
ctx->priv->rescalc = mg2di_residual_calc_alloc();
if (!ctx->priv->rescalc)
@@ -1060,6 +1123,7 @@ EGSContext *mg2di_egs_alloc(size_t domain_size[2])
mg2di_timer_init(&ctx->timer_res_calc);
mg2di_timer_init(&ctx->timer_init);
mg2di_timer_init(&ctx->timer_solve);
+ mg2di_timer_init(&ctx->timer_mpi_sync);
return ctx;
fail:
@@ -1067,6 +1131,113 @@ fail:
return NULL;
}
+EGSContext *mg2di_egs_alloc(size_t domain_size[2])
+{
+ EGSContext *ctx;
+ DomainGeometry *dg;
+
+ dg = mg2di_dg_alloc(1);
+ if (!dg)
+ return NULL;
+
+ dg->domain_size[0] = domain_size[0];
+ dg->domain_size[1] = domain_size[1];
+
+ dg->components[0].interior.start[0] = 0;
+ dg->components[0].interior.start[1] = 0;
+ dg->components[0].interior.size[0] = domain_size[0];
+ dg->components[0].interior.size[1] = domain_size[1];
+
+ dg->components[0].exterior.start[0] = -FD_STENCIL_MAX;
+ dg->components[0].exterior.start[1] = -FD_STENCIL_MAX;
+ dg->components[0].exterior.size[0] = domain_size[0] + 2 * FD_STENCIL_MAX;
+ dg->components[0].exterior.size[1] = domain_size[1] + 2 * FD_STENCIL_MAX;
+
+ for (int i = 0; i < ARRAY_ELEMS(dg->components[0].bnd_is_outer); i++)
+ dg->components[0].bnd_is_outer[i] = 1;
+
+ ctx = egs_alloc(dg, 0);
+ mg2di_dg_free(&dg);
+ if (!ctx)
+ return NULL;
+
+ return ctx;
+}
+
+EGSContext *mg2di_egs_alloc_mpi(MPI_Comm comm, const DomainGeometry *dg)
+{
+ EGSContext *ctx = NULL;
+ Rect *overlaps_recv = NULL, *overlaps_send = NULL;
+ ptrdiff_t *lo;
+ int local_component;
+ int ret;
+
+ MPI_Comm_rank(comm, &local_component);
+
+ overlaps_recv = calloc(dg->nb_components, sizeof(*overlaps_recv));
+ overlaps_send = calloc(dg->nb_components, sizeof(*overlaps_send));
+ if (!overlaps_recv || !overlaps_send)
+ goto fail;
+
+ ret = mg2di_dg_edge_overlaps(overlaps_recv, overlaps_send,
+ dg, local_component, FD_STENCIL_MAX);
+ if (ret < 0)
+ goto fail;
+
+ ctx = egs_alloc(dg, local_component);
+
+ ctx->priv->comm = comm;
+
+ ctx->priv->sync_sendtypes = calloc(dg->nb_components, sizeof(*ctx->priv->sync_sendtypes));
+ ctx->priv->sync_senddispl = calloc(dg->nb_components, sizeof(*ctx->priv->sync_senddispl));
+ ctx->priv->sync_sendcounts = calloc(dg->nb_components, sizeof(*ctx->priv->sync_sendcounts));
+ ctx->priv->sync_recvtypes = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvtypes));
+ ctx->priv->sync_recvdispl = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvdispl));
+ ctx->priv->sync_recvcounts = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvcounts));
+ if (!ctx->priv->sync_sendtypes || !ctx->priv->sync_senddispl || !ctx->priv->sync_sendcounts ||
+ !ctx->priv->sync_recvtypes || !ctx->priv->sync_recvdispl || !ctx->priv->sync_recvcounts)
+ goto fail;
+
+ lo = dg->components[local_component].interior.start;
+
+ /* construct the send/receive parameters */
+ for (unsigned int i = 0; i < dg->nb_components; i++) {
+ if (i == local_component) {
+ MPI_Type_dup(MPI_INT, &ctx->priv->sync_sendtypes[i]);
+ MPI_Type_dup(MPI_INT, &ctx->priv->sync_recvtypes[i]);
+ ctx->priv->sync_sendcounts[i] = 0;
+ ctx->priv->sync_recvcounts[i] = 0;
+ ctx->priv->sync_senddispl[i] = 0;
+ ctx->priv->sync_recvdispl[i] = 0;
+
+ continue;
+ }
+
+ /* receive */
+ MPI_Type_vector(overlaps_recv[i].size[1], overlaps_recv[i].size[0],
+ ctx->u->stride[0], MPI_DOUBLE, &ctx->priv->sync_recvtypes[i]);
+ MPI_Type_commit(&ctx->priv->sync_recvtypes[i]);
+ ctx->priv->sync_recvcounts[i] = 1;
+ ctx->priv->sync_recvdispl[i] = ((overlaps_recv[i].start[1] - lo[1]) * ctx->u->stride[0] +
+ (overlaps_recv[i].start[0] - lo[0])) * sizeof(*ctx->u->data);
+
+ /* send */
+ MPI_Type_vector(overlaps_send[i].size[1], overlaps_send[i].size[0],
+ ctx->u->stride[0], MPI_DOUBLE, &ctx->priv->sync_sendtypes[i]);
+ MPI_Type_commit(&ctx->priv->sync_sendtypes[i]);
+ ctx->priv->sync_sendcounts[i] = 1;
+ ctx->priv->sync_senddispl[i] = ((overlaps_send[i].start[1] - lo[1]) * ctx->u->stride[0] +
+ (overlaps_send[i].start[0] - lo[0])) * sizeof(*ctx->u->data);
+ }
+
+ return ctx;
+fail:
+ free(overlaps_recv);
+ free(overlaps_send);
+ mg2di_egs_free(&ctx);
+ return NULL;
+}
+
void mg2di_egs_free(EGSContext **pctx)
{
EGSContext *ctx = *pctx;
@@ -1082,8 +1253,10 @@ void mg2di_egs_free(EGSContext **pctx)
exact_arrays_free(ctx);
mg2di_ndarray_free(&ctx->u);
- mg2di_ndarray_free(&ctx->u_base);
+ mg2di_ndarray_free(&ctx->u_exterior);
+ mg2di_ndarray_free(&ctx->priv->u_base);
mg2di_ndarray_free(&ctx->priv->u_next);
+ mg2di_ndarray_free(&ctx->priv->u_next_exterior);
mg2di_ndarray_free(&ctx->priv->u_next_base);
mg2di_ndarray_free(&ctx->rhs);
@@ -1105,6 +1278,28 @@ void mg2di_egs_free(EGSContext **pctx)
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)
mg2di_bc_free(&ctx->boundaries[i]);
+ if (ctx->priv->sync_sendtypes) {
+ for (int i = 0; i < ctx->priv->dg->nb_components; i++) {
+ if (ctx->priv->sync_sendtypes[i])
+ MPI_Type_free(&ctx->priv->sync_sendtypes[i]);
+ }
+ }
+ if (ctx->priv->sync_recvtypes) {
+ for (int i = 0; i < ctx->priv->dg->nb_components; i++) {
+ if (ctx->priv->sync_recvtypes[i])
+ MPI_Type_free(&ctx->priv->sync_recvtypes[i]);
+ }
+ }
+ free(ctx->priv->sync_sendtypes);
+ free(ctx->priv->sync_recvtypes);
+
+ free(ctx->priv->sync_sendcounts);
+ free(ctx->priv->sync_senddispl);
+ free(ctx->priv->sync_recvcounts);
+ free(ctx->priv->sync_recvdispl);
+
+ mg2di_dg_free(&ctx->priv->dg);
+
tp_free(&ctx->priv->tp_internal);
free(ctx->priv);
diff --git a/ell_grid_solve.h b/ell_grid_solve.h
index 7ce61a0..2893ce5 100644
--- a/ell_grid_solve.h
+++ b/ell_grid_solve.h
@@ -40,6 +40,9 @@
#include <stdint.h>
#include <threadpool.h>
+#include <mpi.h>
+
+#include "components.h"
#include "log.h"
#include "mg2d_boundary.h"
#include "mg2d_constants.h"
@@ -122,7 +125,8 @@ typedef struct EGSContext {
int cpuflags;
/**
- * Size of the solver grid, set by mg2di_egs_alloc().
+ * Size of the solver grid, set by mg2di_egs_alloc[_mpi](). For
+ * multi-component runs, this contains the size of this component only.
* Read-only for the caller.
*/
size_t domain_size[2];
@@ -142,6 +146,9 @@ typedef struct EGSContext {
/**
* Boundary specification, indexed by MG2DBoundaryLoc.
* To be filled by the caller before mg2di_egs_init().
+ *
+ * For multi-component runs, only the outer (not inter-component) boundary
+ * specifications are accessed by the solver.
*/
MG2DBoundary *boundaries[4];
@@ -156,9 +163,9 @@ typedef struct EGSContext {
NDArray *u;
/**
- * u including the ghost zones.
+ * u including the outer boundary ghost zones.
*/
- NDArray *u_base;
+ NDArray *u_exterior;
/**
* Values of the right-hand side.
@@ -199,6 +206,7 @@ typedef struct EGSContext {
Timer timer_res_calc;
Timer timer_init;
Timer timer_solve;
+ Timer timer_mpi_sync;
} EGSContext;
#define EGS_INIT_FLAG_SAME_DIFF_COEFFS (1 << 0)
@@ -211,6 +219,18 @@ typedef struct EGSContext {
* @return The solver context on success, NULL on failure.
*/
EGSContext *mg2di_egs_alloc(size_t domain_size[2]);
+
+/**
+ * Allocate a solver component in a multi-component MPI-based solve.
+ *
+ * @param comm The MPI communicator used to communicate with the other
+ * components
+ * @dg The geometry of the full computational domain. This component is indexed
+ * by its MPI rank in this geometry.
+ *
+ * @return The solver context on success, NULL on failure.
+ */
+EGSContext *mg2di_egs_alloc_mpi(MPI_Comm comm, const DomainGeometry *dg);
/**
* Initialize the solver for use, after all the required fields are filled by
* the caller.
diff --git a/meson.build b/meson.build
index 0c55f2e..8f09386 100644
--- a/meson.build
+++ b/meson.build
@@ -6,6 +6,7 @@ add_project_arguments('-D_XOPEN_SOURCE=700', language : 'c')
lib_src = [
'bicgstab.c',
'boundary.c',
+ 'components.c',
'cpu.c',
'ell_grid_solve.c',
'log.c',
@@ -27,7 +28,9 @@ liblapacke = cc.find_library('lapacke')
dep_tp = declare_dependency(link_args : '-lthreadpool')
-deps = [dep_tp, libm, libcblas, liblapacke]
+dep_mpi = dependency('mpi', language: 'c')
+
+deps = [dep_tp, libm, libcblas, liblapacke, dep_mpi]
cdata = configuration_data()
cdata.set10('ARCH_X86', false)
@@ -94,7 +97,7 @@ endif
library('mg2d', lib_src, lib_obj, link_args : ver_flag, dependencies : deps, link_depends : verscript)
# test programs
-test_progs = ['relax', 'mg2d']
+test_progs = ['relax', 'relax_mpi', 'mg2d']
foreach t : test_progs
target = t + '_test'
executable(target, [target + '.c'] + lib_src, lib_obj, dependencies : deps + [libm])
diff --git a/mg2d.c b/mg2d.c
index 25a1759..d09796b 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -192,7 +192,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
/* prolongate the coarser-level correction */
mg2di_timer_start(&level->timer_prolong);
ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp,
- level->child->solver->u_base);
+ level->child->solver->u_exterior);
mg2di_timer_stop(&level->timer_prolong);
if (ret < 0)
return ret;
diff --git a/relax_mpi_test.c b/relax_mpi_test.c
new file mode 100644
index 0000000..65888f6
--- /dev/null
+++ b/relax_mpi_test.c
@@ -0,0 +1,236 @@
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <mpi.h>
+
+#include "components.h"
+#include "ell_grid_solve.h"
+#include "log.h"
+#include "mg2d_boundary.h"
+#include "mg2d_constants.h"
+#include "ndarray.h"
+
+#define DOMAIN_SIZE 1.0
+#define FD_STENCIL 2
+
+#if 1
+static double sol(double x, double y)
+{
+ return sin(M_PI * x) * sin(M_PI * y);
+}
+static double sol_dx(double x, double y)
+{
+ return M_PI * cos(M_PI * x) * sin(M_PI * y);
+}
+static double sol_dy(double x, double y)
+{
+ return M_PI * sin(M_PI * x) * cos(M_PI * y);
+}
+static double sol_dxx(double x, double y)
+{
+ return -M_PI * M_PI * sol(x, y);
+}
+static double sol_dyy(double x, double y)
+{
+ return -M_PI * M_PI * sol(x, y);
+}
+static double sol_dxy(double x, double y)
+{
+ return M_PI * M_PI * cos(M_PI * x) * cos(M_PI * y);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ EGSContext *ctx;
+ DomainGeometry *dg = NULL;
+ DomainComponent *dc = NULL;
+ long int log2N, log2maxiter;
+ long int N, maxiter;
+ double res_old, res_new;
+ int ret = 0;
+
+ char processor_name[MPI_MAX_PROCESSOR_NAME];
+ int nb_processes, rank, processor_name_len;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <log2 N> <log2 maxiter>\n", argv[0]);
+ return 1;
+ }
+ log2N = strtol(argv[1], NULL, 0);
+ log2maxiter = strtol(argv[2], NULL, 0);
+ if (log2N <= 0 || log2maxiter < 0 ||
+ log2N >= sizeof(N) * 8 || log2maxiter >= sizeof(maxiter) * 8) {
+ fprintf(stderr, "Invalid log2N/log2maxiter values: %ld/%ld\n",
+ log2N, log2maxiter);
+ return 1;
+ }
+
+ N = (1L << log2N) + 1;
+ maxiter = 1L << log2maxiter;
+
+ MPI_Init(NULL, NULL);
+
+ MPI_Comm_size(MPI_COMM_WORLD, &nb_processes);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Get_processor_name(processor_name, &processor_name_len);
+
+ fprintf(stderr, "This is process %d out of %d, running on %s\n",
+ rank, nb_processes, processor_name);
+
+ dg = mg2di_dg_alloc(nb_processes);
+ if (!dg) {
+ fprintf(stderr, "Error allocating domain geometry\n");
+ ret = 1;
+ goto fail;
+ }
+
+ dg->domain_size[0] = N;
+ dg->domain_size[1] = N;
+
+ for (unsigned int i = 0; i < dg->nb_components; i++) {
+ size_t patch_start, patch_end, patch_size_y;
+
+ patch_size_y = (N + nb_processes - 1) / nb_processes;
+ patch_start = i * patch_size_y;
+ patch_end = MIN((i + 1) * patch_size_y, N);
+ patch_size_y = patch_end - patch_start;
+ if (patch_size_y <= 0) {
+ fprintf(stderr, "Too many processes for grid size %ld: %d\n", N, nb_processes);
+ ret = 1;
+ goto fail;
+ }
+
+ dg->components[i].interior.start[0] = 0;
+ dg->components[i].interior.start[1] = patch_start;
+ dg->components[i].interior.size[0] = N;
+ dg->components[i].interior.size[1] = patch_size_y;
+
+ dg->components[i].exterior.start[0] = -FD_STENCIL;
+ dg->components[i].exterior.start[1] = i ? patch_start : -FD_STENCIL;
+ dg->components[i].exterior.size[0] = N + 2 * FD_STENCIL;
+ dg->components[i].exterior.size[1] = patch_size_y + ((i == 0) * FD_STENCIL) + ((i == nb_processes - 1) * FD_STENCIL);
+
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0L] = 1;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0U] = 1;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1L] = i == 0;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1U] = i == nb_processes - 1;
+ }
+ dc = &dg->components[rank];
+
+ ctx = mg2di_egs_alloc_mpi(MPI_COMM_WORLD, dg);
+ if (!ctx) {
+ fprintf(stderr, "Error allocating the solver context\n");
+ return 1;
+ }
+
+ ctx->logger.log = mg2di_log_default_callback;
+
+ ctx->step[0] = DOMAIN_SIZE / (N - 1.0);
+ ctx->step[1] = DOMAIN_SIZE / (N - 1.0);
+
+ ctx->fd_stencil = FD_STENCIL;
+
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const int bnd_dir = mg2d_bnd_out_dir(bnd_loc);
+
+ double coord[2];
+
+ if (!dc->bnd_is_outer[bnd_loc])
+ continue;
+
+ bnd->type = MG2D_BC_TYPE_FIXVAL;
+
+ memset(bnd->val, 0, ctx->domain_size[!ci] * sizeof(*bnd->val));
+
+ for (int j = 1; j < ctx->fd_stencil; j++) {
+ double *dst = bnd->val + j * bnd->val_stride;
+
+ coord[ci] = mg2d_bnd_is_upper(bnd_loc) * DOMAIN_SIZE + bnd_dir * j * ctx->step[ci];
+
+ for (ptrdiff_t k = -j; k < (ptrdiff_t)ctx->domain_size[!ci] + j; k++) {
+ coord[!ci] = (k + dc->interior.start[!ci]) * ctx->step[!ci];
+ dst[k] = sol(coord[0], coord[1]);
+ }
+ }
+ }
+
+ for (size_t y = 0; y < ctx->domain_size[1]; y++) {
+ const double y_coord = (y + dc->interior.start[1]) * ctx->step[1];
+
+ memset(NDPTR2D(ctx->u, 0, y), 0, sizeof(*ctx->u->data) * ctx->domain_size[0]);
+
+ for (size_t x = 1; x < ctx->domain_size[0]; x++) {
+ const double x_coord = x * ctx->step[0];
+
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], x, y) = 1.0;
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], x, y) = 1.0;
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_11], x, y) = 1.0;
+
+ *NDPTR2D(ctx->rhs, x, y) = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord);
+ }
+
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+ }
+
+ ret = mg2di_egs_init(ctx, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error initializing the solver context: %d\n", ret);
+ ret = 1;
+ goto fail;
+ }
+
+ MPI_Reduce(&ctx->residual_max, &res_old, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+
+ for (int i = 0; i < maxiter; i++) {
+ ret = mg2di_egs_solve(ctx, EGS_SOLVE_RELAXATION, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Error during relaxation\n");
+ ret = 1;
+ goto fail;
+ }
+ MPI_Reduce(&ctx->residual_max, &res_new, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+ if (rank == 0) {
+ if (res_new > 1e3) {
+ fprintf(stderr, "Diverged at step %d: %g -> %g\n", i, res_old, res_new);
+ goto fail;
+ }
+ fprintf(stderr, "Step %d: %g -> %g (%g)\n", i, res_old, res_new, res_old / res_new);
+ res_old = res_new;
+ }
+ }
+
+ {
+ double max_err = 0.0;
+
+ for (size_t y = 0; y < ctx->domain_size[1]; y++) {
+ const double y_coord = (y + dc->interior.start[1]) * ctx->step[1];
+
+ for (size_t x = 0; x < ctx->domain_size[0]; x++) {
+ const double x_coord = x * ctx->step[0];
+ double err = fabs(ctx->u->data[y * ctx->u->stride[0] + x] - sol(x_coord, y_coord));
+ if (err > max_err)
+ max_err = err;
+ }
+ }
+ MPI_Reduce(rank ? &max_err : MPI_IN_PLACE, &max_err, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+ if (rank == 0)
+ fprintf(stderr, "max(|solution - exact|): %g\n", max_err);
+ }
+
+fail:
+ mg2di_egs_free(&ctx);
+ mg2di_dg_free(&dg);
+ MPI_Finalize();
+ return ret;
+}