aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-05-23 11:39:59 +0200
committerAnton Khirnov <anton@khirnov.net>2019-05-23 11:41:31 +0200
commit5bd1bccffd411384b02ca772822d87fac126e67f (patch)
tree3a9494a87f93c4fa35063d47afdf294aa3b720aa
parent4c972cfc352ae5ba851cae142ca6fe594d88bc04 (diff)
mg2d: add support for MPI-based multi-component solves
For the moment, only the finest component is distributed, any coarser levels are gathered to rank 0. That should change in the future.
-rw-r--r--libmg2d.v2
-rw-r--r--meson.build2
-rw-r--r--mg2d.c1057
-rw-r--r--mg2d.h16
-rw-r--r--mg2d_mpi_test.c283
5 files changed, 1228 insertions, 132 deletions
diff --git a/libmg2d.v b/libmg2d.v
index b4901a6..5c9f863 100644
--- a/libmg2d.v
+++ b/libmg2d.v
@@ -1,4 +1,4 @@
-LIBMG2D_12 {
+LIBMG2D_13 {
global: mg2d_*;
local: *;
};
diff --git a/meson.build b/meson.build
index 8f09386..4599633 100644
--- a/meson.build
+++ b/meson.build
@@ -97,7 +97,7 @@ endif
library('mg2d', lib_src, lib_obj, link_args : ver_flag, dependencies : deps, link_depends : verscript)
# test programs
-test_progs = ['relax', 'relax_mpi', 'mg2d']
+test_progs = ['relax', 'relax_mpi', 'mg2d', 'mg2d_mpi']
foreach t : test_progs
target = t + '_test'
executable(target, [target + '.c'] + lib_src, lib_obj, dependencies : deps + [libm])
diff --git a/mg2d.c b/mg2d.c
index d09796b..a0865c2 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -25,7 +25,10 @@
#include <string.h>
#include <threadpool.h>
+#include <mpi.h>
+
#include "common.h"
+#include "components.h"
#include "cpu.h"
#include "ell_grid_solve.h"
#include "log.h"
@@ -40,6 +43,12 @@
typedef struct MG2DLevel {
unsigned int depth;
+ /* comm used for restriction to/prolongation from child
+ * size is equal to dg->nb_components and is guaranteed to
+ * be a superset of the child's comm */
+ MPI_Comm mpi_comm;
+ DomainGeometry *dg;
+
EGSContext *solver;
int egs_init_flags;
@@ -49,6 +58,25 @@ typedef struct MG2DLevel {
NDArray *prolong_tmp_base;
NDArray *prolong_tmp;
+ Rect restrict_dst_extents;
+ Rect prolong_src_extents;
+
+ NDArray *restrict_dst;
+ int *restrict_sendcounts;
+ int *restrict_senddispl;
+ MPI_Datatype *restrict_sendtypes;
+ int *restrict_recvcounts;
+ int *restrict_recvdispl;
+ MPI_Datatype *restrict_recvtypes;
+
+ NDArray *prolong_dst;
+ int *prolong_sendcounts;
+ int *prolong_senddispl;
+ MPI_Datatype *prolong_sendtypes;
+ int *prolong_recvcounts;
+ int *prolong_recvdispl;
+ MPI_Datatype *prolong_recvtypes;
+
struct MG2DLevel *child;
/* timings */
@@ -58,9 +86,21 @@ typedef struct MG2DLevel {
Timer timer_restrict;
Timer timer_correct;
Timer timer_reinit;
+ Timer timer_mpi_sync;
} MG2DLevel;
+typedef struct DomainHierarchy {
+ int nb_levels;
+ DomainGeometry **levels;
+ double (*stepsizes)[2];
+} DomainHierarchy;
+
struct MG2DInternal {
+ MPI_Comm mpi_comm;
+ DomainHierarchy dh;
+ DomainGeometry *dg;
+ unsigned int local_component;
+
MG2DLogger logger;
TPContext *tp;
@@ -76,7 +116,7 @@ struct MG2DInternal {
int cpuflags;
Timer timer_solve;
- Timer timer_levels_init;
+ Timer timer_levels_init;
};
static void log_callback(MG2DLogger *log, int level, const char *fmt, va_list vl)
@@ -129,9 +169,61 @@ static int mg_solve(MG2DContext *ctx, MG2DLevel *level, enum EGSType solve_type,
return 0;
}
+static int mg_restrict_rhs(MG2DContext *ctx, MG2DLevel *l)
+{
+ int multi_component = l->dg->nb_components > 1;
+ NDArray *a_dst = multi_component ? l->restrict_dst : l->child->solver->rhs;
+ int ret = 0;
+
+ mg2di_timer_start(&l->timer_restrict);
+ ret = mg2di_gt_transfer(l->transfer_restrict, a_dst, l->solver->residual);
+ mg2di_timer_stop(&l->timer_restrict);
+ if (ret < 0)
+ goto finish;
+
+ if (multi_component) {
+ mg2di_timer_start(&l->timer_mpi_sync);
+ MPI_Alltoallw(a_dst->data,
+ l->restrict_sendcounts, l->restrict_senddispl, l->restrict_sendtypes,
+ l->child ? l->child->solver->rhs->data : NULL,
+ l->restrict_recvcounts, l->restrict_recvdispl, l->restrict_recvtypes,
+ l->mpi_comm);
+ mg2di_timer_stop(&l->timer_mpi_sync);
+ }
+
+finish:
+ return ret;
+}
+
+static int mg_prolong_u(MG2DContext *ctx, MG2DLevel *l)
+{
+ int multi_component = l->dg->nb_components > 1;
+ NDArray *a_src = multi_component ? l->prolong_dst : l->child->solver->u_exterior;
+ int ret = 0;
+
+ if (multi_component) {
+ mg2di_timer_start(&l->timer_mpi_sync);
+ MPI_Alltoallw(l->child ? l->child->solver->u_exterior->data : NULL,
+ l->prolong_sendcounts, l->prolong_senddispl, l->prolong_sendtypes,
+ l->prolong_dst->data,
+ l->prolong_recvcounts, l->prolong_recvdispl, l->prolong_recvtypes,
+ l->mpi_comm);
+ mg2di_timer_stop(&l->timer_mpi_sync);
+ }
+
+ mg2di_timer_start(&l->timer_prolong);
+ ret = mg2di_gt_transfer(l->transfer_prolong, l->prolong_tmp, a_src);
+ mg2di_timer_stop(&l->timer_prolong);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
{
- enum EGSType solve_type = allow_exact && level->solver->domain_size[0] <= ctx->max_exact_size ?
+ enum EGSType solve_type = (allow_exact && level->dg->nb_components == 1 &&
+ level->solver->domain_size[0] <= ctx->max_exact_size) ?
EGS_SOLVE_EXACT : EGS_SOLVE_RELAXATION;
double res_old, res_new;
int ret = 0;
@@ -169,31 +261,28 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
/* pre-restrict relaxation */
for (int j = 0; j < ctx->nb_relax_pre; j++) {
- ret = mg_solve(ctx, level, solve_type,
- "pre-relax", j == ctx->nb_relax_pre - 1 && level->child);
+ ret = mg_solve(ctx, level, solve_type, "pre-relax",
+ j == ctx->nb_relax_pre - 1 &&
+ level->depth < ctx->priv->dh.nb_levels - 1);
if (ret < 0)
return ret;
}
- if (level->child) {
+ if (level->depth < ctx->priv->dh.nb_levels - 1) {
/* restrict the residual as to the coarser-level rhs */
- mg2di_timer_start(&level->timer_restrict);
- ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs,
- level->solver->residual);
- mg2di_timer_stop(&level->timer_restrict);
+ ret = mg_restrict_rhs(ctx, level);
if (ret < 0)
return ret;
/* solve on the coarser level */
- ret = mg_solve_subgrid(ctx, level->child, 1);
- if (ret < 0)
- return ret;
+ if (level->child) {
+ ret = mg_solve_subgrid(ctx, level->child, 1);
+ if (ret < 0)
+ return ret;
+ }
/* prolongate the coarser-level correction */
- mg2di_timer_start(&level->timer_prolong);
- ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp,
- level->child->solver->u_exterior);
- mg2di_timer_stop(&level->timer_prolong);
+ ret = mg_prolong_u(ctx, level);
if (ret < 0)
return ret;
@@ -255,40 +344,155 @@ static void bnd_copy(MG2DBoundary *bdst, const double *src, ptrdiff_t src_stride
}
}
-static int restrict_diff_coeffs(MG2DContext *ctx, enum GridTransferOperator op,
- EGSContext *dst, EGSContext *src)
+static int restrict_dc_sync(MG2DContext *ctx, MG2DLevel *l)
{
MG2DInternal *priv = ctx->priv;
+ Rect *overlaps_recv = NULL, *overlaps_send = NULL;
+ MPI_Datatype *sendtypes = NULL, *recvtypes = NULL;
+ int *senddispl = NULL, *recvdispl = NULL, *sendcounts = NULL, *recvcounts = NULL;
+
+ const ptrdiff_t *lo = l->dg->components[priv->local_component].interior.start;
+ const ptrdiff_t stride = l->solver->diff_coeffs[0]->stride[0];
+
+ int ret = 0;
+
+ overlaps_recv = calloc(l->dg->nb_components, sizeof(*overlaps_recv));
+ overlaps_send = calloc(l->dg->nb_components, sizeof(*overlaps_send));
+ sendtypes = calloc(l->dg->nb_components, sizeof(*sendtypes));
+ senddispl = calloc(l->dg->nb_components, sizeof(*senddispl));
+ sendcounts = calloc(l->dg->nb_components, sizeof(*sendcounts));
+ recvtypes = calloc(l->dg->nb_components, sizeof(*recvtypes));
+ recvdispl = calloc(l->dg->nb_components, sizeof(*recvdispl));
+ recvcounts = calloc(l->dg->nb_components, sizeof(*recvcounts));
+ if (!overlaps_recv || !overlaps_send || !sendtypes || !recvtypes ||
+ !senddispl || !recvdispl || !sendcounts || !recvcounts) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ ret = mg2di_dg_edge_overlaps(overlaps_recv, overlaps_send,
+ l->dg, priv->local_component, FD_STENCIL_MAX);
+ if (ret < 0)
+ goto fail;
+
+ /* construct the send/receive parameters */
+ for (unsigned int i = 0; i < l->dg->nb_components; i++) {
+ if (i == priv->local_component) {
+ MPI_Type_dup(MPI_INT, &sendtypes[i]);
+ MPI_Type_dup(MPI_INT, &recvtypes[i]);
+ sendcounts[i] = 0;
+ recvcounts[i] = 0;
+ senddispl[i] = 0;
+ recvdispl[i] = 0;
+ continue;
+ }
+
+ /* receive */
+ MPI_Type_vector(overlaps_recv[i].size[1], overlaps_recv[i].size[0],
+ stride, MPI_DOUBLE, &recvtypes[i]);
+ MPI_Type_commit(&recvtypes[i]);
+ recvcounts[i] = 1;
+ recvdispl[i] = ((overlaps_recv[i].start[1] - lo[1]) * stride +
+ (overlaps_recv[i].start[0] - lo[0])) * sizeof(double);
+
+ /* send */
+ MPI_Type_vector(overlaps_send[i].size[1], overlaps_send[i].size[0],
+ stride, MPI_DOUBLE, &sendtypes[i]);
+ MPI_Type_commit(&sendtypes[i]);
+ sendcounts[i] = 1;
+ senddispl[i] = ((overlaps_send[i].start[1] - lo[1]) * stride +
+ (overlaps_send[i].start[0] - lo[0])) * sizeof(double);
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(l->solver->diff_coeffs); i++) {
+ MPI_Alltoallw(l->solver->diff_coeffs[i]->data,
+ sendcounts, senddispl, sendtypes,
+ l->solver->diff_coeffs[i]->data,
+ recvcounts, recvdispl, recvtypes,
+ l->mpi_comm);
+ }
+
+fail:
+ if (sendtypes) {
+ for (unsigned int i = 0; i < l->dg->nb_components; i++) {
+ if (sendtypes[i])
+ MPI_Type_free(&sendtypes[i]);
+ }
+ }
+ if (recvtypes) {
+ for (unsigned int i = 0; i < l->dg->nb_components; i++) {
+ if (recvtypes[i])
+ MPI_Type_free(&recvtypes[i]);
+ }
+ }
+ free(sendtypes);
+ free(senddispl);
+ free(sendcounts);
+ free(recvtypes);
+ free(recvdispl);
+ free(recvcounts);
+ free(overlaps_recv);
+ free(overlaps_send);
+ return ret;
+}
+
+static int restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *l,
+ enum GridTransferOperator op)
+{
+ MG2DInternal *priv = ctx->priv;
+ const DomainComponent *dc_src = &l->dg->components[priv->local_component];
+ int multi_component = l->dg->nb_components > 1;
+
GridTransferContext *tc;
int ret = 0;
+ /* get the ghost points for interpolation */
+ if (multi_component) {
+ ret = restrict_dc_sync(ctx, l);
+ if (ret < 0)
+ return ret;
+ }
+
tc = mg2di_gt_alloc(op);
if (!tc)
return -ENOMEM;
- tc->tp = priv->tp;
- tc->cpuflags = priv->cpuflags;
+ tc->tp = priv->tp;
+ tc->cpuflags = priv->cpuflags;
+ tc->extrapolate_distance = 1;
- tc->src.size[0] = src->domain_size[0];
- tc->src.size[1] = src->domain_size[1];
- tc->src.step[0] = src->step[0];
- tc->src.step[1] = src->step[1];
+ for (int dim = 0; dim < 2; dim++) {
+ tc->src.start[!dim] = dc_src->interior.start[dim];
+ tc->src.size[!dim] = dc_src->interior.size[dim];
+ tc->src.step[!dim] = priv->dh.stepsizes[l->depth][dim];
- tc->dst.size[0] = dst->domain_size[0];
- tc->dst.size[1] = dst->domain_size[1];
- tc->dst.step[0] = dst->step[0];
- tc->dst.step[1] = dst->step[1];
+ tc->dst.start[!dim] = l->restrict_dst_extents.start[dim];
+ tc->dst.size[!dim] = l->restrict_dst_extents.size[dim];
+ tc->dst.step[!dim] = priv->dh.stepsizes[l->depth + 1][dim];
+ }
ret = mg2di_gt_init(tc);
if (ret < 0)
goto finish;
for (int i = 0; i < MG2D_DIFF_COEFF_NB; i++) {
- ret = mg2di_gt_transfer(tc, dst->diff_coeffs[i], src->diff_coeffs[i]);
+ ret = mg2di_gt_transfer(tc, multi_component ? l->restrict_dst : l->child->solver->diff_coeffs[i],
+ l->solver->diff_coeffs[i]);
if (ret < 0)
goto finish;
+
+ if (multi_component) {
+ mg2di_timer_start(&l->timer_mpi_sync);
+ MPI_Alltoallw(l->restrict_dst->data,
+ l->restrict_sendcounts, l->restrict_senddispl, l->restrict_sendtypes,
+ l->child ? l->child->solver->diff_coeffs[i]->data : NULL,
+ l->restrict_recvcounts, l->restrict_recvdispl, l->restrict_recvtypes,
+ l->mpi_comm);
+ mg2di_timer_stop(&l->timer_mpi_sync);
+ }
}
+
finish:
mg2di_gt_free(&tc);
@@ -307,6 +511,73 @@ static double findmax(double *arr, size_t size[2], ptrdiff_t stride)
return ret;
}
+static int mg_setup_restrict(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOperator op)
+{
+ MG2DInternal *priv = ctx->priv;
+ const DomainComponent *dc_src = &l->dg->components[priv->local_component];
+
+ GridTransferContext *gt;
+ int ret;
+
+ gt = mg2di_gt_alloc(op);
+ if (!gt)
+ return -ENOMEM;
+ l->transfer_restrict = gt;
+
+ gt->tp = priv->tp;
+ gt->cpuflags = priv->cpuflags;
+ gt->extrapolate_distance = 1;
+
+ for (int dim = 0; dim < 2; dim++) {
+ gt->src.start[!dim] = dc_src->interior.start[dim];
+ gt->src.size[!dim] = dc_src->interior.size[dim];
+ gt->src.step[!dim] = priv->dh.stepsizes[l->depth][dim];
+
+ gt->dst.start[!dim] = l->restrict_dst_extents.start[dim];
+ gt->dst.size[!dim] = l->restrict_dst_extents.size[dim];
+ gt->dst.step[!dim] = priv->dh.stepsizes[l->depth + 1][dim];
+ }
+
+ ret = mg2di_gt_init(gt);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int mg_setup_prolong(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOperator op)
+{
+ MG2DInternal *priv = ctx->priv;
+ const DomainComponent *dc_dst = &l->dg->components[priv->local_component];
+
+ GridTransferContext *gt;
+ int ret;
+
+ gt = mg2di_gt_alloc(op);
+ if (!gt)
+ return -ENOMEM;
+ l->transfer_prolong = gt;
+
+ gt->tp = priv->tp;
+ gt->cpuflags = priv->cpuflags;
+
+ for (int dim = 0; dim < 2; dim++) {
+ gt->src.start[!dim] = l->prolong_src_extents.start[dim];
+ gt->src.size[!dim] = l->prolong_src_extents.size[dim];
+ gt->src.step[!dim] = priv->dh.stepsizes[l->depth + 1][dim];
+
+ gt->dst.start[!dim] = dc_dst->interior.start[dim];
+ gt->dst.size[!dim] = dc_dst->interior.size[dim];
+ gt->dst.step[!dim] = priv->dh.stepsizes[l->depth][dim];
+ }
+
+ ret = mg2di_gt_init(gt);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
static int mg_levels_init(MG2DContext *ctx)
{
MG2DInternal *priv = ctx->priv;
@@ -322,53 +593,63 @@ static int mg_levels_init(MG2DContext *ctx)
ctx->diff_coeffs_stride);
diff2_max = MAX(diff2_max, tmp);
+ if (priv->dg->nb_components > 1) {
+ MPI_Allreduce(MPI_IN_PLACE, &diff0_max, 1,
+ MPI_DOUBLE, MPI_MAX, priv->mpi_comm);
+ MPI_Allreduce(MPI_IN_PLACE, &diff2_max, 1,
+ MPI_DOUBLE, MPI_MAX, priv->mpi_comm);
+ }
+
cur = priv->root;
prev = NULL;
if (priv->u) {
- mg2di_ndarray_copy(cur->solver->u, priv->u);
+ ret = mg2di_ndarray_copy(cur->solver->u, priv->u);
+ if (ret < 0)
+ return ret;
mg2di_ndarray_free(&priv->u);
ctx->u = cur->solver->u->data;
ctx->u_stride = cur->solver->u->stride[0];
+ }
- mg2di_ndarray_copy(cur->solver->rhs, priv->rhs);
+ if (priv->rhs) {
+ ret = mg2di_ndarray_copy(cur->solver->rhs, priv->rhs);
+ if (ret < 0)
+ return ret;
mg2di_ndarray_free(&priv->rhs);
ctx->rhs = cur->solver->rhs->data;
ctx->rhs_stride = cur->solver->rhs->stride[0];
+ }
+ if (priv->diff_coeffs[0]) {
for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
mg2di_ndarray_copy(cur->solver->diff_coeffs[i], priv->diff_coeffs[i]);
mg2di_ndarray_free(&priv->diff_coeffs[i]);
ctx->diff_coeffs[i] = cur->solver->diff_coeffs[i]->data;
}
ctx->diff_coeffs_stride = cur->solver->diff_coeffs[0]->stride[0];
-
}
while (cur) {
- if (!prev) {
- cur->solver->step[0] = ctx->step[0];
- cur->solver->step[1] = ctx->step[1];
- } else {
- cur->solver->step[0] = prev->solver->step[0] * ((double)(prev->solver->domain_size[0] - 1) / (cur->solver->domain_size[0] - 1));
- cur->solver->step[1] = prev->solver->step[1] * ((double)(prev->solver->domain_size[1] - 1) / (cur->solver->domain_size[1] - 1));
- }
+ cur->solver->step[0] = priv->dh.stepsizes[cur->depth][0];
+ cur->solver->step[1] = priv->dh.stepsizes[cur->depth][1];
- for (int i = 0; i < ARRAY_ELEMS(cur->solver->boundaries); i++) {
- MG2DBoundary *bsrc = ctx->boundaries[i];
- MG2DBoundary *bdst = cur->solver->boundaries[i];
+ for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(cur->solver->boundaries); bnd_idx++) {
+ const int ci = mg2d_bnd_coord_idx(bnd_idx);
+ MG2DBoundary *bsrc = ctx->boundaries[bnd_idx];
+ MG2DBoundary *bdst = cur->solver->boundaries[bnd_idx];
bdst->type = bsrc->type;
switch (bsrc->type) {
case MG2D_BC_TYPE_FIXVAL:
- if (!prev)
+ if (!prev) {
bnd_copy(bdst, bsrc->val, bsrc->val_stride, ctx->fd_stencil,
- cur->solver->domain_size[0]);
- else
- bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]);
- break;
+ cur->solver->domain_size[!ci]);
+ break;
+ }
+ /* fall-through */
case MG2D_BC_TYPE_REFLECT:
case MG2D_BC_TYPE_FALLOFF:
- bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]);
+ bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[!ci]);
break;
default:
return -ENOSYS;
@@ -393,7 +674,7 @@ static int mg_levels_init(MG2DContext *ctx)
while (cur) {
mg2di_gt_free(&cur->transfer_restrict);
mg2di_gt_free(&cur->transfer_prolong);
- if (cur->child) {
+ if (cur->depth < priv->dh.nb_levels - 1) {
enum GridTransferOperator op_interp;
enum GridTransferOperator op_restrict;
@@ -403,7 +684,8 @@ static int mg_levels_init(MG2DContext *ctx)
default: return -ENOSYS;
}
- if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1) {
+ if (priv->dh.levels[cur->depth]->domain_size[0] ==
+ 2 * (priv->dh.levels[cur->depth + 1]->domain_size[0] - 1) + 1) {
if (ctx->fd_stencil == 1)
op_restrict = GRID_TRANSFER_FW_2;
else
@@ -411,51 +693,15 @@ static int mg_levels_init(MG2DContext *ctx)
} else
op_restrict = op_interp;
- cur->transfer_restrict = mg2di_gt_alloc(op_restrict);
- if (!cur->transfer_restrict)
- return -ENOMEM;
-
- cur->transfer_restrict->tp = priv->tp;
- cur->transfer_restrict->cpuflags = priv->cpuflags;
-
- cur->transfer_restrict->src.size[0] = cur->solver->domain_size[0];
- cur->transfer_restrict->src.size[1] = cur->solver->domain_size[1];
- cur->transfer_restrict->src.step[0] = cur->solver->step[0];
- cur->transfer_restrict->src.step[1] = cur->solver->step[1];
-
- cur->transfer_restrict->dst.size[0] = cur->child->solver->domain_size[0];
- cur->transfer_restrict->dst.size[1] = cur->child->solver->domain_size[1];
- cur->transfer_restrict->dst.step[0] = cur->child->solver->step[0];
- cur->transfer_restrict->dst.step[1] = cur->child->solver->step[1];
-
- ret = mg2di_gt_init(cur->transfer_restrict);
+ ret = mg_setup_restrict(ctx, cur, op_restrict);
if (ret < 0)
return ret;
- cur->transfer_prolong = mg2di_gt_alloc(op_interp);
- if (!cur->transfer_prolong)
- return -ENOMEM;
-
- cur->transfer_prolong->tp = priv->tp;
- cur->transfer_prolong->cpuflags = priv->cpuflags;
-
- cur->transfer_prolong->dst.size[0] = cur->solver->domain_size[0];
- cur->transfer_prolong->dst.size[1] = cur->solver->domain_size[1];
- cur->transfer_prolong->dst.step[0] = cur->solver->step[0];
- cur->transfer_prolong->dst.step[1] = cur->solver->step[1];
-
- cur->transfer_prolong->src.start[0] = -FD_STENCIL_MAX;
- cur->transfer_prolong->src.start[1] = -FD_STENCIL_MAX;
- cur->transfer_prolong->src.size[0] = cur->child->solver->domain_size[0] + 2 * FD_STENCIL_MAX;
- cur->transfer_prolong->src.size[1] = cur->child->solver->domain_size[1] + 2 * FD_STENCIL_MAX;
- cur->transfer_prolong->src.step[0] = cur->child->solver->step[0];
- cur->transfer_prolong->src.step[1] = cur->child->solver->step[1];
-
- ret = mg2di_gt_init(cur->transfer_prolong);
+ ret = mg_setup_prolong(ctx, cur, op_interp);
if (ret < 0)
return ret;
- ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver);
+ ret = restrict_diff_coeffs(ctx, cur, op_interp);
if (ret < 0)
return ret;
}
@@ -468,6 +714,121 @@ static int mg_levels_init(MG2DContext *ctx)
return 0;
}
+static void mg_dh_uninit(DomainHierarchy *dh)
+{
+ for (int i = 0; i < dh->nb_levels; i++)
+ mg2di_dg_free(&dh->levels[i]);
+ free(dh->levels);
+ free(dh->stepsizes);
+ memset(dh, 0, sizeof(*dh));
+}
+
+static int mg_level_partition(DomainGeometry **res, size_t domain_size,
+ const DomainGeometry *src)
+{
+ DomainGeometry *dg;
+
+ dg = mg2di_dg_alloc(1);
+ if (!dg)
+ return -ENOMEM;
+
+ dg->domain_size[0] = domain_size;
+ dg->domain_size[1] = domain_size;
+
+ dg->components[0].interior.start[0] = 0;
+ dg->components[0].interior.start[1] = 0;
+ dg->components[0].interior.size[0] = domain_size;
+ dg->components[0].interior.size[1] = domain_size;
+
+ dg->components[0].exterior.start[0] = -FD_STENCIL_MAX;
+ dg->components[0].exterior.start[1] = -FD_STENCIL_MAX;
+ dg->components[0].exterior.size[0] = domain_size + 2 * FD_STENCIL_MAX;
+ dg->components[0].exterior.size[1] = domain_size + 2 * FD_STENCIL_MAX;
+
+ for (int i = 0; i < 4; i++)
+ dg->components[0].bnd_is_outer[i] = 1;
+
+ *res = dg;
+
+ return 0;
+}
+
+static int log2i(int n)
+{
+ int ret = 0;
+ while (n) {
+ ret++;
+ n >>= 1;
+ }
+ return ret - 1;
+}
+
+static int mg_dh_init(DomainHierarchy *dh, const DomainGeometry *root,
+ const double *step_root, unsigned int max_levels)
+{
+ DomainGeometry *next_dg;
+ size_t next_size;
+ int ret = 0;
+
+ ret = mg2di_dg_copy(&next_dg, root);
+ if (ret < 0)
+ return ret;
+ next_size = root->domain_size[0];
+
+ for (int depth = 0; depth < max_levels; depth++) {
+ DomainGeometry **dg_tmp;
+ double (*stepsizes_tmp)[2];
+
+ size_t cur_size = next_size;
+ DomainGeometry *cur_dg = next_dg;
+ next_dg = NULL;
+
+ // choose the domain size for the next child
+ // the children all have sizes 2**n + 1
+ // but on the top level we skip the closest lower one if it is too close
+ if (depth == 0) {
+ next_size = (1 << log2i(cur_size - 2)) + 1;
+ if ((double)cur_size / next_size < 1.5)
+ next_size = (next_size >> 1) + 1;
+ } else
+ next_size = (cur_size >> 1) + 1;
+
+ ret = mg_level_partition(&next_dg, next_size, cur_dg);
+ if (ret < 0) {
+ mg2di_dg_free(&cur_dg);
+ goto fail;
+ }
+
+ dg_tmp = realloc(dh->levels, (depth + 1) * sizeof(*dh->levels));
+ if (!dg_tmp) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ dh->levels = dg_tmp;
+
+ stepsizes_tmp = realloc(dh->stepsizes, (depth + 1) * sizeof(*dh->stepsizes));
+ if (!stepsizes_tmp) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ dh->stepsizes = stepsizes_tmp;
+
+ dh->levels[depth] = cur_dg;
+ dh->stepsizes[depth][0] = step_root[0] * (root->domain_size[0] - 1) / (cur_dg->domain_size[0] - 1);
+ dh->stepsizes[depth][1] = step_root[1] * (root->domain_size[1] - 1) / (cur_dg->domain_size[1] - 1);
+
+ dh->nb_levels++;
+
+ if (next_size <= 4)
+ break;
+ }
+
+fail:
+ if (ret < 0)
+ mg_dh_uninit(dh);
+ return ret;
+}
+
static int threadpool_init(MG2DContext *ctx)
{
MG2DInternal *priv = ctx->priv;
@@ -480,7 +841,20 @@ static int threadpool_init(MG2DContext *ctx)
return 0;
}
-static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size);
+static int mg_levels_alloc(MG2DContext *ctx);
+
+static double mg_resmax_get(MG2DContext *ctx)
+{
+ MG2DInternal *priv = ctx->priv;
+ double ret = priv->root->solver->residual_max;
+
+ if (priv->root->dg->nb_components > 1) {
+ MPI_Allreduce(MPI_IN_PLACE, &ret, 1,
+ MPI_DOUBLE, MPI_MAX, priv->mpi_comm);
+ }
+
+ return ret;
+}
int mg2d_solve(MG2DContext *ctx)
{
@@ -491,7 +865,7 @@ int mg2d_solve(MG2DContext *ctx)
int ret;
if (!priv->root) {
- ret = mg_levels_alloc(ctx, ctx->domain_size);
+ ret = mg_levels_alloc(ctx);
if (ret < 0)
return ret;
}
@@ -521,7 +895,7 @@ int mg2d_solve(MG2DContext *ctx)
root->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
- res_orig = s_root->residual_max;
+ res_orig = mg_resmax_get(ctx);
res_prev = res_orig;
for (int i = 0; i < ctx->maxiter; i++) {
@@ -529,7 +903,7 @@ int mg2d_solve(MG2DContext *ctx)
if (ret < 0)
goto fail;
- res_cur = s_root->residual_max;
+ res_cur = mg_resmax_get(ctx);
if (res_cur < ctx->tol) {
mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n",
@@ -568,6 +942,50 @@ static void mg_level_free(MG2DLevel **plevel)
if (!level)
return;
+ if (level->restrict_sendtypes) {
+ for (int i = 0; i < level->dg->nb_components; i++) {
+ if (level->restrict_sendtypes[i])
+ MPI_Type_free(&level->restrict_sendtypes[i]);
+ }
+ }
+ if (level->restrict_recvtypes) {
+ for (int i = 0; i < level->dg->nb_components; i++) {
+ if (level->restrict_recvtypes[i])
+ MPI_Type_free(&level->restrict_recvtypes[i]);
+ }
+ }
+ free(level->restrict_sendtypes);
+ free(level->restrict_recvtypes);
+
+ free(level->restrict_sendcounts);
+ free(level->restrict_senddispl);
+ free(level->restrict_recvcounts);
+ free(level->restrict_recvdispl);
+
+ if (level->prolong_sendtypes) {
+ for (int i = 0; i < level->dg->nb_components; i++) {
+ if (level->prolong_sendtypes[i])
+ MPI_Type_free(&level->prolong_sendtypes[i]);
+ }
+ }
+ if (level->prolong_recvtypes) {
+ for (int i = 0; i < level->dg->nb_components; i++) {
+ if (level->prolong_recvtypes[i])
+ MPI_Type_free(&level->prolong_recvtypes[i]);
+ }
+ }
+
+ free(level->prolong_sendtypes);
+ free(level->prolong_recvtypes);
+
+ free(level->prolong_sendcounts);
+ free(level->prolong_senddispl);
+ free(level->prolong_recvcounts);
+ free(level->prolong_recvdispl);
+
+ mg2di_ndarray_free(&level->restrict_dst);
+ mg2di_ndarray_free(&level->prolong_dst);
+
mg2di_ndarray_free(&level->prolong_tmp);
mg2di_ndarray_free(&level->prolong_tmp_base);
mg2di_egs_free(&level->solver);
@@ -575,12 +993,16 @@ static void mg_level_free(MG2DLevel **plevel)
mg2di_gt_free(&level->transfer_restrict);
mg2di_gt_free(&level->transfer_prolong);
+ mg2di_dg_free(&level->dg);
+
free(level);
*plevel = NULL;
}
-static MG2DLevel *mg_level_alloc(const size_t domain_size)
+static MG2DLevel *mg_level_alloc(const DomainGeometry *dg,
+ unsigned int local_component, MPI_Comm comm)
{
+ const DomainComponent *dc = &dg->components[local_component];
MG2DLevel *level;
int ret;
@@ -588,7 +1010,14 @@ static MG2DLevel *mg_level_alloc(const size_t domain_size)
if (!level)
return NULL;
- ret = mg2di_ndarray_alloc(&level->prolong_tmp_base, 2, (size_t [2]){domain_size + 1, domain_size + 1}, 0);
+ ret = mg2di_dg_copy(&level->dg, dg);
+ if (ret < 0)
+ goto fail;
+
+ level->mpi_comm = comm;
+
+ ret = mg2di_ndarray_alloc(&level->prolong_tmp_base, 2,
+ (size_t [2]){dc->interior.size[1] + 1, dc->interior.size[0] + 1}, 0);
if (ret < 0)
goto fail;
@@ -596,15 +1025,43 @@ static MG2DLevel *mg_level_alloc(const size_t domain_size)
if (ret < 0)
goto fail;
- level->solver = mg2di_egs_alloc((size_t [2]){domain_size, domain_size});
+ if (dg->nb_components > 1)
+ level->solver = mg2di_egs_alloc_mpi(level->mpi_comm, dg);
+ else
+ level->solver = mg2di_egs_alloc(dg->domain_size);
if (!level->solver)
goto fail;
+ if (dg->nb_components > 1) {
+ level->restrict_sendcounts = calloc(dg->nb_components, sizeof(*level->restrict_sendcounts));
+ level->restrict_senddispl = calloc(dg->nb_components, sizeof(*level->restrict_senddispl));
+ level->restrict_sendtypes = calloc(dg->nb_components, sizeof(*level->restrict_sendtypes));
+ level->restrict_recvcounts = calloc(dg->nb_components, sizeof(*level->restrict_recvcounts));
+ level->restrict_recvdispl = calloc(dg->nb_components, sizeof(*level->restrict_recvdispl));
+ level->restrict_recvtypes = calloc(dg->nb_components, sizeof(*level->restrict_recvtypes));
+
+ if (!level->restrict_sendcounts || !level->restrict_senddispl || !level->restrict_sendtypes ||
+ !level->restrict_recvcounts || !level->restrict_recvdispl || !level->restrict_recvtypes)
+ goto fail;
+
+ level->prolong_sendcounts = calloc(dg->nb_components, sizeof(*level->prolong_sendcounts));
+ level->prolong_senddispl = calloc(dg->nb_components, sizeof(*level->prolong_senddispl));
+ level->prolong_sendtypes = calloc(dg->nb_components, sizeof(*level->prolong_sendtypes));
+ level->prolong_recvcounts = calloc(dg->nb_components, sizeof(*level->prolong_recvcounts));
+ level->prolong_recvdispl = calloc(dg->nb_components, sizeof(*level->prolong_recvdispl));
+ level->prolong_recvtypes = calloc(dg->nb_components, sizeof(*level->prolong_recvtypes));
+
+ if (!level->prolong_sendcounts || !level->prolong_senddispl || !level->prolong_sendtypes ||
+ !level->prolong_recvcounts || !level->prolong_recvdispl || !level->prolong_recvtypes)
+ goto fail;
+ }
+
mg2di_timer_init(&level->timer_solve);
mg2di_timer_init(&level->timer_prolong);
mg2di_timer_init(&level->timer_restrict);
mg2di_timer_init(&level->timer_correct);
mg2di_timer_init(&level->timer_reinit);
+ mg2di_timer_init(&level->timer_mpi_sync);
return level;
fail:
@@ -612,51 +1069,251 @@ fail:
return NULL;
}
-static int log2i(int n)
+static int mg_interdomain_setup(MG2DContext *ctx, MG2DLevel *level)
{
+ MG2DInternal *priv = ctx->priv;
+ const DomainGeometry *dg_fine = priv->dh.levels[level->depth];
+ const DomainGeometry *dg_coarse = priv->dh.levels[level->depth + 1];
+ const double step_scale[2] = {
+ priv->dh.stepsizes[level->depth][0] / priv->dh.stepsizes[level->depth + 1][0],
+ priv->dh.stepsizes[level->depth][1] / priv->dh.stepsizes[level->depth + 1][1]
+ };
+ const unsigned int lc = priv->local_component;
+
+ Rect *restrict_components = NULL, *prolong_components = NULL;
+
int ret = 0;
- while (n) {
- ret++;
- n >>= 1;
+
+ restrict_components = calloc(dg_fine->nb_components, sizeof(*restrict_components));
+ prolong_components = calloc(dg_fine->nb_components, sizeof(*prolong_components));
+ if (!restrict_components || !prolong_components)
+ return -ENOMEM;
+
+ /* calculate the extents restricted onto by each component */
+ for (int comp = 0; comp < dg_fine->nb_components; comp++) {
+ const Rect *dc_src = &dg_fine->components[comp].interior;
+ Rect *dst = restrict_components + comp;
+
+ for (int dim = 0; dim < 2; dim++) {
+ ptrdiff_t end;
+
+ dst->start[dim] = ceil(dc_src->start[dim] * step_scale[dim]);
+ end = ceil((dc_src->start[dim] + dc_src->size[dim]) * step_scale[dim]);
+ end = MIN(end, dg_coarse->domain_size[dim]);
+ dst->size[dim] = end >= dst->start[dim] ? end - dst->start[dim] : 0;
+ }
}
- return ret - 1;
+ level->restrict_dst_extents = restrict_components[lc];
+
+ /* calculate the extents prolongated from by each component */
+ for (int comp = 0; comp < dg_fine->nb_components; comp++) {
+ const Rect *dc_src = &dg_fine->components[comp].interior;
+ Rect *dst = prolong_components + comp;
+
+ for (int dim = 0; dim < 2; dim++) {
+ ptrdiff_t start, end;
+
+ start = floor(dc_src->start[dim] * step_scale[dim]);
+ end = ceil((dc_src->start[dim] + dc_src->size[dim]) * step_scale[dim]);
+
+ start = MAX(start, 0) - FD_STENCIL_MAX;
+ end = MIN(end, dg_coarse->domain_size[dim]) + FD_STENCIL_MAX;
+
+ dst->start[dim] = start;
+ dst->size[dim] = end >= start ? end - start : 0;
+ }
+ }
+ level->prolong_src_extents = prolong_components[lc];
+
+ if (dg_fine->nb_components == 1)
+ goto finish;
+
+ ret = mg2di_ndarray_alloc(&level->restrict_dst, 2,
+ (size_t [2]){ restrict_components[lc].size[1], restrict_components[lc].size[0] }, 0);
+ if (ret < 0)
+ goto finish;
+
+ ret = mg2di_ndarray_alloc(&level->prolong_dst, 2,
+ (size_t [2]){ prolong_components[lc].size[1], prolong_components[lc].size[0] }, 0);
+ if (ret < 0)
+ goto finish;
+
+ /* setup the MPI transfer parameters for restrict */
+ for (int comp = 0; comp < dg_fine->nb_components; comp++) {
+ NDArray *data;
+ const Rect *loc;
+ Rect overlap;
+
+ /* send from local component to comp */
+ loc = &restrict_components[lc];
+ data = level->restrict_dst;
+ memset(&overlap, 0, sizeof(overlap));
+ if (comp < dg_coarse->nb_components) {
+ mg2di_rect_intersect(&overlap, loc, &dg_coarse->components[comp].interior);
+ }
+
+ if (overlap.size[0] && overlap.size[1]) {
+ MPI_Type_vector(overlap.size[1], overlap.size[0],
+ data->stride[0], MPI_DOUBLE,
+ &level->restrict_sendtypes[comp]);
+ MPI_Type_commit(&level->restrict_sendtypes[comp]);
+ level->restrict_sendcounts[comp] = 1;
+ level->restrict_senddispl[comp] = sizeof(*data->data) *
+ ((overlap.start[1] - loc->start[1]) * data->stride[0] +
+ (overlap.start[0] - loc->start[0]) * data->stride[1]);
+ } else {
+ level->restrict_sendcounts[comp] = 0;
+ level->restrict_senddispl[comp] = 0;
+ MPI_Type_dup(MPI_INT, &level->restrict_sendtypes[comp]);
+ }
+
+ /* receive on local component from comp */
+ memset(&overlap, 0, sizeof(overlap));
+ if (lc < dg_coarse->nb_components) {
+ loc = &dg_coarse->components[lc].interior;
+ data = level->child->solver->rhs;
+ mg2di_rect_intersect(&overlap, loc, &restrict_components[comp]);
+ }
+ if (overlap.size[0] && overlap.size[1]) {
+ MPI_Type_vector(overlap.size[1], overlap.size[0],
+ data->stride[0], MPI_DOUBLE,
+ &level->restrict_recvtypes[comp]);
+ MPI_Type_commit(&level->restrict_recvtypes[comp]);
+ level->restrict_recvcounts[comp] = 1;
+ level->restrict_recvdispl[comp] = sizeof(*data->data) *
+ ((overlap.start[1] - loc->start[1]) * data->stride[0] +
+ (overlap.start[0] - loc->start[0]) * data->stride[1]);
+ } else {
+ level->restrict_recvcounts[comp] = 0;
+ level->restrict_recvdispl[comp] = 0;
+ MPI_Type_dup(MPI_INT, &level->restrict_recvtypes[comp]);
+ }
+ }
+
+ /* setup the MPI transfer parameters for prolong */
+ for (int comp = 0; comp < dg_fine->nb_components; comp++) {
+ NDArray *data;
+ const Rect *loc;
+ Rect overlap;
+
+ /* send from local component to comp */
+ memset(&overlap, 0, sizeof(overlap));
+ if (lc < dg_coarse->nb_components) {
+ loc = &dg_coarse->components[lc].exterior;
+ data = level->child->solver->u_exterior;
+ mg2di_rect_intersect(&overlap, loc, &prolong_components[comp]);
+ }
+
+ if (overlap.size[0] && overlap.size[1]) {
+ MPI_Type_vector(overlap.size[1], overlap.size[0],
+ data->stride[0], MPI_DOUBLE,
+ &level->prolong_sendtypes[comp]);
+ MPI_Type_commit(&level->prolong_sendtypes[comp]);
+ level->prolong_sendcounts[comp] = 1;
+ level->prolong_senddispl[comp] = sizeof(*data->data) *
+ ((overlap.start[1] - loc->start[1]) * data->stride[0] +
+ (overlap.start[0] - loc->start[0]) * data->stride[1]);
+ } else {
+ level->prolong_sendcounts[comp] = 0;
+ level->prolong_senddispl[comp] = 0;
+ MPI_Type_dup(MPI_INT, &level->prolong_sendtypes[comp]);
+ }
+
+ /* receive on local component from comp */
+ memset(&overlap, 0, sizeof(overlap));
+ loc = &prolong_components[lc];
+ data = level->prolong_dst;
+ if (comp < dg_coarse->nb_components) {
+ mg2di_rect_intersect(&overlap, loc, &dg_coarse->components[comp].exterior);
+ }
+ if (overlap.size[0] && overlap.size[1]) {
+ MPI_Type_vector(overlap.size[1], overlap.size[0],
+ data->stride[0], MPI_DOUBLE,
+ &level->prolong_recvtypes[comp]);
+ MPI_Type_commit(&level->prolong_recvtypes[comp]);
+ level->prolong_recvcounts[comp] = 1;
+ level->prolong_recvdispl[comp] = sizeof(*data->data) *
+ ((overlap.start[1] - loc->start[1]) * data->stride[0] +
+ (overlap.start[0] - loc->start[0]) * data->stride[1]);
+ } else {
+ level->prolong_recvcounts[comp] = 0;
+ level->prolong_recvdispl[comp] = 0;
+ MPI_Type_dup(MPI_INT, &level->prolong_recvtypes[comp]);
+ }
+ }
+
+finish:
+ free(restrict_components);
+ free(prolong_components);
+
+ return ret;
}
-static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size)
+static int mg_levels_alloc(MG2DContext *ctx)
{
MG2DInternal *priv = ctx->priv;
+ MPI_Comm comm_parent;
MG2DLevel **dst;
- size_t next_size;
- dst = &priv->root;
- next_size = domain_size;
+ int ret = 0;
- for (int depth = 0; depth < ctx->max_levels; depth++) {
- size_t cur_size = next_size;
+ /* compute the levels geometries/partitioning */
+ ret = mg_dh_init(&priv->dh, priv->dg, ctx->step, ctx->max_levels);
+ if (ret < 0)
+ return ret;
- // choose the domain size for the next child
- // the children all have sizes 2**n + 1
- // but on the top level we skip the closest lower one if it is too close
- if (depth == 0) {
- next_size = (1 << log2i(cur_size - 2)) + 1;
- if ((double)cur_size / next_size < 1.5)
- next_size = (next_size >> 1) + 1;
- } else
- next_size = (cur_size >> 1) + 1;
+ /* allocate the solver for each level */
+ comm_parent = priv->mpi_comm;
+ dst = &priv->root;
+ for (int depth = 0; depth < priv->dh.nb_levels; depth++) {
+ const DomainGeometry *dg = priv->dh.levels[depth];
+
+ MPI_Comm comm_cur = MPI_COMM_NULL;
+
+ if (dg->nb_components > 1 && comm_parent != MPI_COMM_NULL) {
+ MPI_Group grp_parent, grp_cur;
+ int *ranks;
+
+ ranks = calloc(dg->nb_components, sizeof(*ranks));
+ if (!ranks)
+ return -ENOMEM;
+
+ for (int i = 0; i < dg->nb_components; i++)
+ ranks[i] = i;
- *dst = mg_level_alloc(cur_size);
+ MPI_Comm_group(comm_parent, &grp_parent);
+ MPI_Group_incl(grp_parent, dg->nb_components, ranks, &grp_cur);
+ MPI_Comm_create(comm_parent, grp_cur, &comm_cur);
+
+ MPI_Group_free(&grp_parent);
+ MPI_Group_free(&grp_cur);
+ free(ranks);
+ }
+
+ if (priv->local_component >= dg->nb_components)
+ break;
+
+ *dst = mg_level_alloc(dg, priv->local_component, comm_cur);
if (!*dst)
return -ENOMEM;
(*dst)->depth = depth;
- if (next_size <= 4)
+ dst = &((*dst)->child);
+ comm_parent = comm_cur;
+ }
+
+ /* setup inter-grid MPI sync */
+ for (MG2DLevel *cur = priv->root; cur; cur = cur->child) {
+ if (cur->depth == priv->dh.nb_levels - 1)
break;
- dst = &(*dst)->child;
+ ret = mg_interdomain_setup(ctx, cur);
+ if (ret < 0)
+ return ret;
}
- return 0;
+ return ret;
}
static void log_default_callback(const MG2DContext *ctx, int level, const char *fmt, va_list vl)
@@ -666,13 +1323,14 @@ static void log_default_callback(const MG2DContext *ctx, int level, const char *
vfprintf(stderr, fmt, vl);
}
-MG2DContext *mg2d_solver_alloc(size_t domain_size)
+static MG2DContext *solver_alloc(const size_t domain_size[2])
{
MG2DContext *ctx;
MG2DInternal *priv;
int ret;
- if (domain_size < 3 || SIZE_MAX / domain_size < domain_size)
+ if (domain_size[0] < 3 || domain_size[1] < 3 ||
+ SIZE_MAX / domain_size[0] < domain_size[1])
return NULL;
ctx = calloc(1, sizeof(*ctx));
@@ -690,20 +1348,21 @@ MG2DContext *mg2d_solver_alloc(size_t domain_size)
priv->cpuflags = mg2di_cpu_flags_get();
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) {
- ctx->boundaries[i] = mg2di_bc_alloc(domain_size);
+ const int ci = mg2d_bnd_coord_idx(i);
+ ctx->boundaries[i] = mg2di_bc_alloc(domain_size[!ci]);
if (!ctx->boundaries[i]) {
goto fail;
}
}
- ret = mg2di_ndarray_alloc(&priv->u, 2, (size_t [2]){ domain_size, domain_size },
+ ret = mg2di_ndarray_alloc(&priv->u, 2, (size_t [2]){ domain_size[1], domain_size[0] },
NDARRAY_ALLOC_ZERO);
if (ret < 0)
goto fail;
ctx->u = priv->u->data;
ctx->u_stride = priv->u->stride[0];
- ret = mg2di_ndarray_alloc(&priv->rhs, 2, (size_t [2]){ domain_size, domain_size },
+ ret = mg2di_ndarray_alloc(&priv->rhs, 2, (size_t [2]){ domain_size[1], domain_size[0] },
NDARRAY_ALLOC_ZERO);
if (ret < 0)
goto fail;
@@ -711,7 +1370,7 @@ MG2DContext *mg2d_solver_alloc(size_t domain_size)
ctx->rhs_stride = priv->rhs->stride[0];
for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
- ret = mg2di_ndarray_alloc(&priv->diff_coeffs[i], 2, (size_t [2]){ domain_size, domain_size },
+ ret = mg2di_ndarray_alloc(&priv->diff_coeffs[i], 2, (size_t [2]){ domain_size[1], domain_size[0] },
NDARRAY_ALLOC_ZERO);
if (ret < 0)
goto fail;
@@ -719,8 +1378,6 @@ MG2DContext *mg2d_solver_alloc(size_t domain_size)
}
ctx->diff_coeffs_stride = priv->diff_coeffs[0]->stride[0];
- ctx->domain_size = domain_size;
-
ctx->max_levels = 16;
ctx->max_exact_size = 5;
ctx->maxiter = 16;
@@ -741,6 +1398,126 @@ fail:
return NULL;
}
+MG2DContext *mg2d_solver_alloc(size_t domain_size)
+{
+ MG2DContext *ctx;
+
+ ctx = solver_alloc((size_t [2]){ domain_size, domain_size });
+ if (!ctx)
+ return NULL;
+
+ ctx->priv->dg = mg2di_dg_alloc(1);
+ if (!ctx->priv->dg)
+ goto fail;
+
+ ctx->priv->dg->domain_size[0] = domain_size;
+ ctx->priv->dg->domain_size[1] = domain_size;
+
+ ctx->priv->dg->components[0].interior.start[0] = 0;
+ ctx->priv->dg->components[0].interior.start[1] = 0;
+ ctx->priv->dg->components[0].interior.size[0] = domain_size;
+ ctx->priv->dg->components[0].interior.size[1] = domain_size;
+
+ ctx->priv->dg->components[0].exterior.start[0] = -FD_STENCIL_MAX;
+ ctx->priv->dg->components[0].exterior.start[1] = -FD_STENCIL_MAX;
+ ctx->priv->dg->components[0].exterior.size[0] = domain_size + 2 * FD_STENCIL_MAX;
+ ctx->priv->dg->components[0].exterior.size[1] = domain_size + 2 * FD_STENCIL_MAX;
+ for (int i = 0; i < 4; i++)
+ ctx->priv->dg->components[0].bnd_is_outer[i] = 1;
+
+ ctx->priv->local_component = 0;
+ ctx->priv->mpi_comm = MPI_COMM_NULL;
+
+ ctx->domain_size = domain_size;
+
+ return ctx;
+fail:
+ mg2d_solver_free(&ctx);
+ return NULL;
+}
+
+MG2DContext *mg2d_solver_alloc_mpi(MPI_Comm comm, const size_t local_start[2],
+ const size_t local_size[2])
+{
+ MG2DContext *ctx = NULL;
+ DomainGeometry *dg = NULL;
+ size_t *domainspec = NULL;
+
+ int nb_processes, rank;
+
+ MPI_Comm_size(comm, &nb_processes);
+ MPI_Comm_rank(comm, &rank);
+
+ dg = mg2di_dg_alloc(nb_processes);
+ if (!dg)
+ goto fail;
+
+ domainspec = calloc(4 * nb_processes, sizeof(*domainspec));
+ if (!domainspec)
+ goto fail;
+
+ domainspec[4 * rank + 0] = local_start[0];
+ domainspec[4 * rank + 1] = local_start[1];
+ domainspec[4 * rank + 2] = local_size[0];
+ domainspec[4 * rank + 3] = local_size[1];
+
+ MPI_Allgather(MPI_IN_PLACE, 0, NULL, domainspec, 4 * sizeof(*domainspec), MPI_BYTE, comm);
+ for (unsigned int proc = 0; proc < nb_processes; proc++) {
+ size_t *proc_start = domainspec + 4 * proc;
+ size_t *proc_size = domainspec + 4 * proc + 2;
+
+ dg->components[proc].interior.start[0] = proc_start[0];
+ dg->components[proc].interior.start[1] = proc_start[1];
+ dg->components[proc].interior.size[0] = proc_size[0];
+ dg->components[proc].interior.size[1] = proc_size[1];
+
+ dg->domain_size[0] = MAX(dg->domain_size[0], proc_start[0] + proc_size[0]);
+ dg->domain_size[1] = MAX(dg->domain_size[1], proc_start[1] + proc_size[1]);
+ }
+ mg2di_assert(dg->domain_size[0] == dg->domain_size[1]);
+
+ for (unsigned int proc = 0; proc < nb_processes; proc++) {
+ DomainComponent *dc = &dg->components[proc];
+
+ dc->exterior = dc->interior;
+
+ for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++) {
+ const int ci = mg2d_bnd_coord_idx(bnd_idx);
+ const int upper = mg2d_bnd_is_upper(bnd_idx);
+
+ if (upper && dc->interior.start[ci] + dc->interior.size[ci] == dg->domain_size[ci])
+ dc->exterior.size[ci] += FD_STENCIL_MAX;
+
+ if (!upper && dc->interior.start[ci] == 0) {
+ dc->exterior.start[ci] -= FD_STENCIL_MAX;
+ dc->exterior.size[ci] += FD_STENCIL_MAX;
+ }
+ }
+ dc->bnd_is_outer[MG2D_BOUNDARY_0L] = dc->interior.start[0] == 0;
+ dc->bnd_is_outer[MG2D_BOUNDARY_1L] = dc->interior.start[1] == 0;
+ dc->bnd_is_outer[MG2D_BOUNDARY_0U] = (dc->interior.start[0] + dc->interior.size[0]) == dg->domain_size[0];
+ dc->bnd_is_outer[MG2D_BOUNDARY_1U] = (dc->interior.start[1] + dc->interior.size[1]) == dg->domain_size[1];
+ }
+
+ free(domainspec);
+
+ ctx = solver_alloc(local_size);
+ if (!ctx)
+ return NULL;
+
+ ctx->priv->mpi_comm = comm;
+ ctx->priv->dg = dg;
+ ctx->priv->local_component = rank;
+ ctx->domain_size = dg->domain_size[0];
+
+ return ctx;
+fail:
+ mg2d_solver_free(&ctx);
+ mg2di_dg_free(&dg);
+ free(domainspec);
+ return NULL;
+}
+
void mg2d_solver_free(MG2DContext **pctx)
{
MG2DContext *ctx = *pctx;
@@ -766,6 +1543,10 @@ void mg2d_solver_free(MG2DContext **pctx)
for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs); i++)
mg2di_ndarray_free(&ctx->priv->diff_coeffs[i]);
+ mg2di_dg_free(&ctx->priv->dg);
+
+ mg_dh_uninit(&ctx->priv->dh);
+
free(ctx->priv);
free(ctx);
@@ -796,7 +1577,7 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
EGSExactContext *e = level->solver->exact;
int64_t level_total = level->timer_solve.time_nsec + level->timer_prolong.time_nsec + level->timer_restrict.time_nsec +
- level->timer_correct.time_nsec + level->timer_reinit.time_nsec;
+ level->timer_correct.time_nsec + level->timer_reinit.time_nsec + level->timer_mpi_sync.time_nsec;
if (!level->count_cycles) {
level = level->child;
@@ -821,7 +1602,7 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
if (ret > 0)
p += ret;
- if (level->child) {
+ if (level->timer_prolong.nb_runs) {
ret = snprintf(p, sizeof(buf) - (p - buf),
"%2.2f%% prolong %2.2f%% restrict %2.2f%% correct",
level->timer_prolong.time_nsec * 100.0 / level_total,
@@ -831,6 +1612,14 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
p += ret;
}
+ if (level->timer_mpi_sync.nb_runs) {
+ ret = snprintf(p, sizeof(buf) - (p - buf),
+ " %2.2f%% sync",
+ level->timer_mpi_sync.time_nsec * 100.0 / level_total);
+ if (ret > 0)
+ p += ret;
+ }
+
ret = snprintf(p, sizeof(buf) - (p - buf),
"||%2.2f%% init %2.2f%% residual %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)",
level->solver->timer_init.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
@@ -861,6 +1650,14 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
p += ret;
}
+ if (level->solver->timer_mpi_sync.nb_runs) {
+ ret = snprintf(p, sizeof(buf) - (p - buf),
+ " %2.2f%% sync",
+ level->solver->timer_mpi_sync.time_nsec * 100.0 / level->solver->timer_solve.time_nsec);
+ if (ret > 0)
+ p += ret;
+ }
+
mg2di_log(&priv->logger, ctx->log_level, "%s%s\n", prefix, buf);
level = level->child;
}
diff --git a/mg2d.h b/mg2d.h
index fd11906..1db4ace 100644
--- a/mg2d.h
+++ b/mg2d.h
@@ -22,6 +22,8 @@
#include <stddef.h>
#include <stdint.h>
+#include <mpi.h>
+
#include "mg2d_boundary.h"
#include "mg2d_constants.h"
@@ -168,6 +170,20 @@ typedef struct MG2DContext {
* @return The solver context on success, NULL on failure.
*/
MG2DContext *mg2d_solver_alloc(size_t domain_size);
+
+/**
+ * Allocate a solver component in a multi-component MPI-based solve.
+ *
+ * @param comm The MPI communicator used to communicate with the other
+ * components.
+ * @param local_start Indices of this component's lower left corner in the full
+ * computational domain.
+ * @param local_size Size of this component in each direction.
+ *
+ * @return The solver context on success, NULL on failure.
+ */
+MG2DContext *mg2d_solver_alloc_mpi(MPI_Comm comm, const size_t local_start[2],
+ const size_t local_size[2]);
/**
* Solve the equation, after all the required fields have been filled by the
* caller.
diff --git a/mg2d_mpi_test.c b/mg2d_mpi_test.c
new file mode 100644
index 0000000..1bd2784
--- /dev/null
+++ b/mg2d_mpi_test.c
@@ -0,0 +1,283 @@
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <mpi.h>
+
+#include "mg2d.h"
+#include "mg2d_boundary.h"
+#include "mg2d_constants.h"
+
+#include "components.h"
+
+#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
+#define MIN(x, y) ((x) > (y) ? (y) : (x))
+
+#define MAXITER 64
+#define TOL 5e-15
+
+#define DOMAIN_SIZE 1.0
+#define FD_STENCIL 2
+
+static const double pde_coeffs[MG2D_DIFF_COEFF_NB] = {
+ [MG2D_DIFF_COEFF_00] = 1.0,
+ [MG2D_DIFF_COEFF_10] = 0.9,
+ [MG2D_DIFF_COEFF_01] = 1.1,
+ [MG2D_DIFF_COEFF_20] = 1.2,
+ [MG2D_DIFF_COEFF_02] = 0.8,
+ [MG2D_DIFF_COEFF_11] = 0.7,
+};
+
+#if 1
+static double sol_00(double x, double y)
+{
+ return sin(M_PI * x) * sin(M_PI * y);
+}
+static double sol_10(double x, double y)
+{
+ return M_PI * cos(M_PI * x) * sin(M_PI * y);
+}
+static double sol_01(double x, double y)
+{
+ return M_PI * sin(M_PI * x) * cos(M_PI * y);
+}
+static double sol_20(double x, double y)
+{
+ return -M_PI * M_PI * sol_00(x, y);
+}
+static double sol_02(double x, double y)
+{
+ return -M_PI * M_PI * sol_00(x, y);
+}
+static double sol_11(double x, double y)
+{
+ return M_PI * M_PI * cos(M_PI * x) * cos(M_PI * y);
+}
+#define BC_TYPE MG2D_BC_TYPE_FIXVAL
+#else
+static double sol_00(double x, double y)
+{
+ return cos(M_PI * x) * cos(M_PI * y);
+}
+static double sol_10(double x, double y)
+{
+ return -M_PI * sin(M_PI * x) * cos(M_PI * y);
+}
+static double sol_01(double x, double y)
+{
+ return -M_PI * cos(M_PI * x) * sin(M_PI * y);
+}
+static double sol_20(double x, double y)
+{
+ return -M_PI * M_PI * sol_00(x, y);
+}
+static double sol_02(double x, double y)
+{
+ return -M_PI * M_PI * sol_00(x, y);
+}
+static double sol_11(double x, double y)
+{
+ return M_PI * M_PI * sin(M_PI * x) * sin(M_PI * y);
+}
+#define BC_TYPE MG2D_BC_TYPE_REFLECT
+#endif
+
+static double (*sol[MG2D_DIFF_COEFF_NB])(double x, double y) = {
+ [MG2D_DIFF_COEFF_00] = sol_00,
+ [MG2D_DIFF_COEFF_10] = sol_10,
+ [MG2D_DIFF_COEFF_01] = sol_01,
+ [MG2D_DIFF_COEFF_20] = sol_20,
+ [MG2D_DIFF_COEFF_02] = sol_02,
+ [MG2D_DIFF_COEFF_11] = sol_11,
+};
+
+int main(int argc, char **argv)
+{
+ MG2DContext *ctx = NULL;
+ long int gridsize;
+ int ret = 0;
+
+ DomainGeometry *dg = NULL;
+ DomainComponent *dc = NULL;
+
+ size_t patch_start, patch_end, patch_size_y;
+
+ char processor_name[MPI_MAX_PROCESSOR_NAME];
+ int nb_processes, rank, processor_name_len;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <N>\n", argv[0]);
+ return 1;
+ }
+ gridsize = strtol(argv[1], NULL, 0);
+ if (gridsize <= 0) {
+ fprintf(stderr, "Invalid parameters: %ld\n", gridsize);
+ return 1;
+ }
+
+ MPI_Init(NULL, NULL);
+
+ MPI_Comm_size(MPI_COMM_WORLD, &nb_processes);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Get_processor_name(processor_name, &processor_name_len);
+
+ fprintf(stderr, "This is process %d out of %d, running on %s\n",
+ rank, nb_processes, processor_name);
+
+ dg = mg2di_dg_alloc(nb_processes);
+ if (!dg) {
+ fprintf(stderr, "Error allocating domain geometry\n");
+ ret = 1;
+ goto fail;
+ }
+
+ dg->domain_size[0] = gridsize;
+ dg->domain_size[1] = gridsize;
+
+ for (unsigned int i = 0; i < dg->nb_components; i++) {
+ size_t patch_start, patch_end, patch_size_y;
+
+ patch_size_y = (gridsize + nb_processes - 1) / nb_processes;
+ patch_start = i * patch_size_y;
+ patch_end = MIN((i + 1) * patch_size_y, gridsize);
+ patch_size_y = patch_end - patch_start;
+ if (patch_size_y <= 0) {
+ fprintf(stderr, "Too many processes for grid size %ld: %d\n", gridsize, nb_processes);
+ ret = 1;
+ goto fail;
+ }
+
+ dg->components[i].interior.start[0] = 0;
+ dg->components[i].interior.start[1] = patch_start;
+ dg->components[i].interior.size[0] = gridsize;
+ dg->components[i].interior.size[1] = patch_size_y;
+
+ dg->components[i].exterior.start[0] = -FD_STENCIL;
+ dg->components[i].exterior.start[1] = i ? patch_start : -FD_STENCIL;
+ dg->components[i].exterior.size[0] = gridsize + 2 * FD_STENCIL;
+ dg->components[i].exterior.size[1] = patch_size_y + ((i == 0) * FD_STENCIL) + ((i == nb_processes - 1) * FD_STENCIL);
+
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0L] = 1;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0U] = 1;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1L] = i == 0;
+ dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1U] = i == nb_processes - 1;
+ }
+ dc = &dg->components[rank];
+
+ patch_size_y = (gridsize + nb_processes - 1) / nb_processes;
+ patch_start = rank * patch_size_y;
+ patch_end = MIN((rank + 1) * patch_size_y, gridsize);
+ patch_size_y = patch_end - patch_start;
+ if (patch_size_y <= 0) {
+ fprintf(stderr, "Too many processes for grid size %ld: %d\n", gridsize, nb_processes);
+ ret = 1;
+ goto fail;
+ }
+
+ //while (ret == 0)
+ // sleep(1);
+
+ ctx = mg2d_solver_alloc_mpi(MPI_COMM_WORLD, (size_t [2]){dc->interior.start[0], dc->interior.start[1]},
+ dc->interior.size);
+ if (!ctx) {
+ fprintf(stderr, "Error allocating the solver context\n");
+ return 1;
+ }
+
+ ctx->step[0] = DOMAIN_SIZE / (gridsize - 1);
+ ctx->step[1] = DOMAIN_SIZE / (gridsize - 1);
+
+ ctx->fd_stencil = FD_STENCIL;
+
+ ctx->maxiter = MAXITER;
+ ctx->nb_relax_pre = 2;
+ ctx->nb_cycles = 1;
+ ctx->nb_relax_post = 2;
+ ctx->tol = TOL / (ctx->step[0] * ctx->step[1]);
+ ctx->nb_threads = 1;
+ ctx->log_level = MG2D_LOG_INFO;
+
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const int bnd_dir = mg2d_bnd_out_dir(bnd_loc);
+
+ double coord[2];
+
+ if (!dc->bnd_is_outer[bnd_loc])
+ continue;
+
+ bnd->type = BC_TYPE;
+
+ memset(bnd->val, 0, dc->interior.size[!ci] * sizeof(*bnd->val));
+
+ if (bnd->type == MG2D_BC_TYPE_FIXVAL) {
+ for (int j = 1; j < ctx->fd_stencil; j++) {
+ double *dst = bnd->val + j * bnd->val_stride;
+
+ coord[ci] = mg2d_bnd_is_upper(bnd_loc) * DOMAIN_SIZE + bnd_dir * j * ctx->step[ci];
+
+ for (ptrdiff_t k = -j; k < (ptrdiff_t)dc->interior.size[!ci] + j; k++) {
+ coord[!ci] = (k + dc->interior.start[!ci]) * ctx->step[!ci];
+ dst[k] = sol[MG2D_DIFF_COEFF_00](coord[0], coord[1]);
+ }
+ }
+ }
+ }
+
+ for (size_t y = 0; y < dc->interior.size[1]; y++) {
+ const double y_coord = (y + dc->interior.start[1]) * ctx->step[1];
+
+ memset(ctx->u + y * ctx->u_stride, 0, sizeof(*ctx->u) * dc->interior.size[0]);
+
+ for (size_t x = 0; x < dc->interior.size[0]; x++) {
+ const double x_coord = x * ctx->step[0];
+ double rhs = 0.0;
+
+ for (int i = 0; i < MG2D_DIFF_COEFF_NB; i++) {
+ ctx->diff_coeffs[i][ctx->diff_coeffs_stride * y + x] = pde_coeffs[i];
+ rhs += pde_coeffs[i] * sol[i](x_coord, y_coord);
+ }
+
+ ctx->rhs[y * ctx->rhs_stride + x] = rhs;
+ }
+ }
+
+ ret = mg2d_solve(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error solving the equation\n");
+ ret = 1;
+ goto fail;
+ }
+
+ mg2d_print_stats(ctx, NULL);
+
+ {
+ double max_err = 0.0;
+
+ for (size_t y = 0; y < dc->interior.size[1]; y++) {
+ const double y_coord = (y + dc->interior.start[1]) * ctx->step[1];
+
+ for (size_t x = 0; x < dc->interior.size[0]; x++) {
+ const double x_coord = x * ctx->step[0];
+ double err = fabs(ctx->u[y * ctx->u_stride + x] - sol[MG2D_DIFF_COEFF_00](x_coord, y_coord));
+ if (err > max_err)
+ max_err = err;
+ }
+ }
+ MPI_Reduce(rank ? &max_err : MPI_IN_PLACE, &max_err, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+ if (rank == 0) {
+ fprintf(stderr, "max(|solution - exact|): %g\n", max_err);
+ fprintf(stdout, "%ld %g\n", gridsize, max_err);
+ }
+ }
+
+fail:
+ mg2d_solver_free(&ctx);
+ mg2di_dg_free(&dg);
+ MPI_Finalize();
+ return ret;
+}