From 4c972cfc352ae5ba851cae142ca6fe594d88bc04 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 23 May 2019 11:01:00 +0200 Subject: egs: add support for MPI-based multi-component solves --- components.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ components.h | 73 ++++++++++++++++ ell_grid_solve.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++++------- ell_grid_solve.h | 26 +++++- meson.build | 7 +- mg2d.c | 2 +- relax_mpi_test.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 813 insertions(+), 35 deletions(-) create mode 100644 components.c create mode 100644 components.h create mode 100644 relax_mpi_test.c diff --git a/components.c b/components.c new file mode 100644 index 0000000..ef41653 --- /dev/null +++ b/components.c @@ -0,0 +1,251 @@ +/* + * Multi-component utility code + * Copyright 2019 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include "common.h" +#include "components.h" +#include "mg2d_boundary.h" + +int mg2di_dg_copy(DomainGeometry **pdst, const DomainGeometry *src) +{ + DomainGeometry *dst; + + dst = mg2di_dg_alloc(src->nb_components); + if (!dst) + return -ENOMEM; + + memcpy(dst->components, src->components, + src->nb_components * sizeof(*src->components)); + dst->domain_size[0] = src->domain_size[0]; + dst->domain_size[1] = src->domain_size[1]; + + *pdst = dst; + + return 0; +} + +DomainGeometry *mg2di_dg_alloc(unsigned int nb_components) +{ + DomainGeometry *dg; + + dg = calloc(1, sizeof(*dg)); + if (!dg) + return NULL; + + dg->components = calloc(nb_components, sizeof(*dg->components)); + if (!dg->components) + goto fail; + dg->nb_components = nb_components; + + return dg; +fail: + mg2di_dg_free(&dg); + return NULL; +} + +void mg2di_dg_free(DomainGeometry **pdg) +{ + DomainGeometry *dg = *pdg; + + if (!dg) + return; + + free(dg->components); + + free(dg); + *pdg = NULL; +} + +int mg2di_rect_intersect(Rect *dst, const Rect *src1, const Rect *src2) +{ + ptrdiff_t intersect_start0 = MAX(src1->start[0], src2->start[0]); + ptrdiff_t intersect_start1 = MAX(src1->start[1], src2->start[1]); + ptrdiff_t intersect_end0 = MIN(src1->start[0] + src1->size[0], src2->start[0] + src2->size[0]); + ptrdiff_t intersect_end1 = MIN(src1->start[1] + src1->size[1], src2->start[1] + src2->size[1]); + + if (intersect_start0 < intersect_end0 && intersect_start1 < intersect_end1) { + dst->start[0] = intersect_start0; + dst->start[1] = intersect_start1; + dst->size[0] = intersect_end0 - intersect_start0; + dst->size[1] = intersect_end1 - intersect_start1; + + return 1; + } + + dst->size[0] = 0; + dst->size[1] = 0; + + return 0; +} + +/* merge adjacent/partially overlapping src into dst, + * if the result is rectangular */ +static int rect_merge(Rect *dst, const Rect *src) +{ + /* if dst is empty, copy src */ + if (!dst->size[0] && !dst->size[1]) { + *dst = *src; + return 0; + } + + /* if dst is fully inside src, copy src */ + if (dst->start[0] >= src->start[0] && + dst->start[0] + dst->size[0] <= src->start[0] + src->size[0] && + dst->start[1] >= src->start[1] && + dst->start[1] + dst->size[1] <= src->start[1] + src->size[1]) { + *dst = *src; + return 0; + } + + /* if src is fully inside dst, do nothing */ + if (src->start[0] >= dst->start[0] && + src->start[0] + src->size[0] <= dst->start[0] + dst->size[0] && + src->start[1] >= dst->start[1] && + src->start[1] + src->size[1] <= dst->start[1] + dst->size[1]) + return 0; + + /* if src is adjacent or partially overlaps dst, merge them */ + for (int dir = 0; dir < 2; dir++) { + if (dst->start[dir] != src->start[dir] || + dst->size[dir] != src->size[dir]) + continue; + + /* merge from above */ + if (src->start[!dir] >= dst->start[!dir] && + src->start[!dir] <= dst->start[!dir] + dst->size[!dir]) { + dst->size[!dir] = MAX(dst->size[!dir], src->start[!dir] + src->size[!dir] - dst->start[!dir]); + return 0; + } + + /* merge from below */ + if (dst->start[!dir] >= src->start[!dir] && + dst->start[!dir] <= src->start[!dir] + src->size[!dir]) { + + dst->size[!dir] = MAX(src->size[!dir], dst->start[!dir] + dst->size[!dir] - src->start[!dir]); + dst->start[!dir] = src->start[!dir]; + return 0; + } + } + + /* src and dst do not match */ + return -EINVAL; +} + +static int calc_overlaps_component(Rect *overlaps, const DomainGeometry *dg, + unsigned int comp, int edge_width) +{ + const DomainComponent *c = &dg->components[comp]; + int ret = 0; + + Rect (*overlaps_tmp)[4]; + int *nb_overlaps_tmp; + + overlaps_tmp = calloc(dg->nb_components, sizeof(*overlaps_tmp)); + nb_overlaps_tmp = calloc(dg->nb_components, sizeof(*nb_overlaps_tmp)); + if (!overlaps_tmp || !nb_overlaps_tmp) + return -ENOMEM; + + /* calculate the overlaps for each boundary layer */ + for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++) { + const int ci = mg2d_bnd_coord_idx(bnd_idx); + const int upper = mg2d_bnd_is_upper(bnd_idx); + Rect bnd; + + bnd.start[ci] = c->interior.start[ci] + (upper ? c->interior.size[ci] : -edge_width); + bnd.size[ci] = edge_width; + + bnd.start[!ci] = c->interior.start[!ci] - edge_width; + bnd.size[!ci] = c->interior.size[!ci] + 2 * edge_width; + + for (unsigned int comp_idx = 0; comp_idx < dg->nb_components; comp_idx++) { + const DomainComponent *c1 = &dg->components[comp_idx]; + Rect *dst = &overlaps_tmp[comp_idx][nb_overlaps_tmp[comp_idx]]; + + if (comp_idx == comp) + continue; + + mg2di_rect_intersect(dst, &bnd, &c1->exterior); + if (dst->size[0] && dst->size[1]) + nb_overlaps_tmp[comp_idx]++; + } + } + + /* merge all the overlaps, we should get a single rectangle for each component */ + for (unsigned int comp_idx = 0; comp_idx < dg->nb_components; comp_idx++) { + while (nb_overlaps_tmp[comp_idx] > 1) { + Rect *dst = &overlaps_tmp[comp_idx][0]; + int merged = 0; + + for (int i = 1; i < nb_overlaps_tmp[comp_idx]; i++) { + Rect *src = &overlaps_tmp[comp_idx][i]; + ret = rect_merge(dst, src); + if (ret < 0) + continue; + + memmove(src, src + 1, nb_overlaps_tmp[comp_idx] - i - 1); + nb_overlaps_tmp[comp_idx]--; + merged = 1; + break; + } + + if (!merged) + goto fail; + } + overlaps[comp_idx] = overlaps_tmp[comp_idx][0]; + } + +fail: + free(overlaps_tmp); + free(nb_overlaps_tmp); + + return ret; +} + +int mg2di_dg_edge_overlaps(Rect *overlaps_recv, Rect *overlaps_send, + const DomainGeometry *dg, unsigned int comp, + unsigned int edge_width) +{ + Rect *overlaps = NULL; + int ret; + + overlaps = calloc(SQR(dg->nb_components), sizeof(*overlaps)); + if (!overlaps) + return -ENOMEM; + + for (unsigned int i = 0; i < dg->nb_components; i++) { + ret = calc_overlaps_component(overlaps + i * dg->nb_components, + dg, i, edge_width); + if (ret < 0) + goto fail; + } + + memcpy(overlaps_recv, overlaps + comp * dg->nb_components, + dg->nb_components * sizeof(*overlaps_recv)); + + for (unsigned int i = 0; i < dg->nb_components; i++) + overlaps_send[i] = overlaps[i * dg->nb_components + comp]; + +fail: + free(overlaps); + return ret; +} diff --git a/components.h b/components.h new file mode 100644 index 0000000..bc3c38f --- /dev/null +++ b/components.h @@ -0,0 +1,73 @@ +/* + * Multi-component utility code + * Copyright 2019 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef MG2D_COMPONENTS_H +#define MG2D_COMPONENTS_H + +#include +#include + +typedef struct Rect { + ptrdiff_t start[2]; + size_t size[2]; +} Rect; + +typedef struct DomainComponent { + /* points that belong to this component */ + Rect interior; + /* interior + ghost points on the physical boundaries */ + Rect exterior; + int bnd_is_outer[4]; +} DomainComponent; + +typedef struct DomainGeometry { + size_t domain_size[2]; + + unsigned int nb_components; + DomainComponent *components; +} DomainGeometry; + +DomainGeometry *mg2di_dg_alloc(unsigned int nb_components); +void mg2di_dg_free(DomainGeometry **dg); + +int mg2di_dg_copy(DomainGeometry **dst, const DomainGeometry *src); + +/** + * Compute the intersection of src1 and src2, store the result in dst. + */ +int mg2di_rect_intersect(Rect *dst, const Rect *src1, const Rect *src2); + +/** + * For the component in domain geometry , internal edges are + * rectangles along each non-outer boundary. Edge overlap with component + * is the intersection of internal edges of and exterior of (must + * necessarily be a rectangle). + * + * NB: edge overlap of a component with itself is empty. + * + * This function computes, for the given component + * - edge overlaps of with every component in dg (i.e. the ghost points + * to be received from each remote component) - written into overlaps_recv + * - edge overlaps of every component in dg with (i.e. the ghost points + * to be set to each remote component) - written into overlaps_send + */ +int mg2di_dg_edge_overlaps(Rect *overlaps_recv, Rect *overlaps_send, + const DomainGeometry *dg, unsigned int comp, + unsigned int edge_width); + +#endif // MG2D_COMPONENTS_H diff --git a/ell_grid_solve.c b/ell_grid_solve.c index 7dcd41e..8c37799 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -26,9 +26,11 @@ #include #include +#include #include "bicgstab.h" #include "common.h" +#include "components.h" #include "cpu.h" #include "ell_grid_solve.h" #include "log.h" @@ -76,7 +78,10 @@ struct EGSInternal { NDArray *rhs_base; NDArray *residual_base; + NDArray *u_base; + NDArray *u_next_base; + NDArray *u_next_exterior; NDArray *u_next; int u_next_valid; @@ -98,6 +103,17 @@ struct EGSInternal { EGSExactInternal e; TPContext *tp_internal; + + MPI_Comm comm; + DomainGeometry *dg; + unsigned int local_component; + + int *sync_sendcounts; + int *sync_senddispl; + MPI_Datatype *sync_sendtypes; + int *sync_recvcounts; + int *sync_recvdispl; + MPI_Datatype *sync_recvtypes; }; static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = { @@ -119,6 +135,19 @@ static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = { }, }; +static void boundaries_sync(EGSContext *ctx, NDArray *a_dst) +{ + EGSInternal *priv = ctx->priv; + + mg2di_timer_start(&ctx->timer_mpi_sync); + + MPI_Alltoallw(a_dst->data, priv->sync_sendcounts, priv->sync_senddispl, priv->sync_sendtypes, + a_dst->data, priv->sync_recvcounts, priv->sync_recvdispl, priv->sync_recvtypes, + priv->comm); + + mg2di_timer_stop(&ctx->timer_mpi_sync); +} + static void residual_calc(EGSContext *ctx, int export_res) { EGSInternal *priv = ctx->priv; @@ -230,7 +259,8 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init) double *dst = a_dst->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset); const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset }; - if (bnd->type != bnd_type_order[order_idx]) + if (bnd->type != bnd_type_order[order_idx] || + !priv->dg->components[priv->local_component].bnd_is_outer[i]) continue; switch (bnd->type) { @@ -291,6 +321,10 @@ static void boundaries_apply(EGSContext *ctx, NDArray *a_dst, int init) } } mg2di_timer_stop(&ctx->timer_bnd_corners); + + if (priv->dg->nb_components > 1) + boundaries_sync(ctx, a_dst); + mg2di_timer_stop(&ctx->timer_bnd); } @@ -331,12 +365,15 @@ static int solve_relax_step(EGSContext *ctx, int export_res) int u_next_valid = priv->u_next_valid; if (u_next_valid) { - NDArray *tmp = ctx->u; - NDArray *tmp_base = ctx->u_base; - ctx->u = priv->u_next; - ctx->u_base = priv->u_next_base; - priv->u_next = tmp; - priv->u_next_base = tmp_base; + NDArray *tmp = ctx->u; + NDArray *tmp_ext = ctx->u_exterior; + NDArray *tmp_base = priv->u_base; + ctx->u = priv->u_next; + ctx->u_exterior = priv->u_next_exterior; + priv->u_base = priv->u_next_base; + priv->u_next = tmp; + priv->u_next_exterior = tmp_ext; + priv->u_next_base = tmp_base; priv->u_next_valid = 0; } @@ -352,6 +389,8 @@ static int solve_relax_step(EGSContext *ctx, int export_res) } residual_calc(ctx, 1); + if (priv->dg->nb_components > 1) + boundaries_sync(ctx, ctx->residual); } else { mg2di_assert(u_next_valid); residual_calc(ctx, 0); @@ -799,6 +838,7 @@ int mg2di_egs_init(EGSContext *ctx, int flags) { EGSInternal *priv = ctx->priv; EGSRelaxContext *r = ctx->relax; + DomainComponent *dc = &priv->dg->components[priv->local_component]; int ret = 0; mg2di_timer_start(&ctx->timer_solve); @@ -837,22 +877,22 @@ int mg2di_egs_init(EGSContext *ctx, int flags) arg.dc = MG2D_DIFF_COEFF_00; arg.fact = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00]; - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); arg.dc = MG2D_DIFF_COEFF_10; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]); - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); arg.dc = MG2D_DIFF_COEFF_01; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]); - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); arg.dc = MG2D_DIFF_COEFF_20; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0])); - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); arg.dc = MG2D_DIFF_COEFF_02; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1])); - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); arg.dc = MG2D_DIFF_COEFF_11; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); - tp_execute(ctx->tp, ctx->domain_size[0], init_diff_coeffs_task, &arg); + tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); } if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) @@ -870,13 +910,18 @@ int mg2di_egs_init(EGSContext *ctx, int flags) priv->residual_calc_size[0] = ctx->domain_size[0]; priv->residual_calc_size[1] = ctx->domain_size[1]; - priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL; - priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL; + priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL && + dc->bnd_is_outer[MG2D_BOUNDARY_1L]; + priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL && + dc->bnd_is_outer[MG2D_BOUNDARY_0L]; for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(ctx->boundaries); bnd_idx++) { MG2DBoundary *bnd = ctx->boundaries[bnd_idx]; const int ci = mg2d_bnd_coord_idx(bnd_idx); + if (!dc->bnd_is_outer[bnd_idx]) + continue; + if (bnd->type == MG2D_BC_TYPE_FIXVAL) { double maxval = 0.0; @@ -923,6 +968,7 @@ finish: static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) { EGSInternal *priv = ctx->priv; + const DomainComponent *dc = &priv->dg->components[priv->local_component]; const size_t ghosts = FD_STENCIL_MAX; const size_t size_padded[2] = { @@ -935,15 +981,23 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) }; const Slice slice[2] = { SLICE(ghosts, -ghosts, 1), SLICE(ghosts, -ghosts, 1) }; + Slice slice_exterior[2] = { + SLICE(dc->bnd_is_outer[MG2D_BOUNDARY_1L] ? 0 : ghosts, + dc->bnd_is_outer[MG2D_BOUNDARY_1U] ? 0 : -ghosts, 1), + SLICE(dc->bnd_is_outer[MG2D_BOUNDARY_0L] ? 0 : ghosts, + dc->bnd_is_outer[MG2D_BOUNDARY_0U] ? 0 : -ghosts, 1), + }; int ret; - ret = mg2di_ndarray_alloc(&ctx->u_base, 2, size_padded, NDARRAY_ALLOC_ZERO); + ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ret = mg2di_ndarray_slice(&ctx->u, ctx->u_base, - (Slice [2]){ SLICE(ghosts, size_padded[0] - ghosts, 1), - SLICE(ghosts, -ghosts, 1) }); + ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice); + if (ret < 0) + return ret; + + ret = mg2di_ndarray_slice(&ctx->u_exterior, priv->u_base, slice_exterior); if (ret < 0) return ret; @@ -951,9 +1005,11 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) if (ret < 0) return ret; - ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base, - (Slice [2]){ SLICE(ghosts, size_padded[0] - ghosts, 1), - SLICE(ghosts, -ghosts, 1) }); + ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base, slice); + if (ret < 0) + return ret; + + ret = mg2di_ndarray_slice(&priv->u_next_exterior, priv->u_next_base, slice_exterior); if (ret < 0) return ret; @@ -1010,7 +1066,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) return 0; } -EGSContext *mg2di_egs_alloc(size_t domain_size[2]) +static EGSContext *egs_alloc(const DomainGeometry *dg, unsigned int local_component) { EGSContext *ctx; int ret; @@ -1032,21 +1088,28 @@ EGSContext *mg2di_egs_alloc(size_t domain_size[2]) if (!ctx->exact) goto fail; + ret = mg2di_dg_copy(&ctx->priv->dg, dg); + if (ret < 0) + goto fail; + + ctx->priv->local_component = local_component; + ctx->priv->comm = MPI_COMM_NULL; + mg2di_timer_init(&ctx->exact->timer_mat_construct); mg2di_timer_init(&ctx->exact->timer_bicgstab); mg2di_timer_init(&ctx->exact->timer_lu_solve); mg2di_timer_init(&ctx->exact->timer_export); - if (!domain_size[0] || !domain_size[1] || - domain_size[0] > SIZE_MAX / domain_size[1]) + if (!dg->domain_size[0] || !dg->domain_size[1] || + dg->domain_size[0] > SIZE_MAX / dg->domain_size[1]) goto fail; - ret = arrays_alloc(ctx, domain_size); + ret = arrays_alloc(ctx, dg->components[local_component].interior.size); if (ret < 0) goto fail; - ctx->domain_size[0] = domain_size[0]; - ctx->domain_size[1] = domain_size[1]; + ctx->domain_size[0] = dg->components[local_component].interior.size[0]; + ctx->domain_size[1] = dg->components[local_component].interior.size[1]; ctx->priv->rescalc = mg2di_residual_calc_alloc(); if (!ctx->priv->rescalc) @@ -1060,6 +1123,7 @@ EGSContext *mg2di_egs_alloc(size_t domain_size[2]) mg2di_timer_init(&ctx->timer_res_calc); mg2di_timer_init(&ctx->timer_init); mg2di_timer_init(&ctx->timer_solve); + mg2di_timer_init(&ctx->timer_mpi_sync); return ctx; fail: @@ -1067,6 +1131,113 @@ fail: return NULL; } +EGSContext *mg2di_egs_alloc(size_t domain_size[2]) +{ + EGSContext *ctx; + DomainGeometry *dg; + + dg = mg2di_dg_alloc(1); + if (!dg) + return NULL; + + dg->domain_size[0] = domain_size[0]; + dg->domain_size[1] = domain_size[1]; + + dg->components[0].interior.start[0] = 0; + dg->components[0].interior.start[1] = 0; + dg->components[0].interior.size[0] = domain_size[0]; + dg->components[0].interior.size[1] = domain_size[1]; + + dg->components[0].exterior.start[0] = -FD_STENCIL_MAX; + dg->components[0].exterior.start[1] = -FD_STENCIL_MAX; + dg->components[0].exterior.size[0] = domain_size[0] + 2 * FD_STENCIL_MAX; + dg->components[0].exterior.size[1] = domain_size[1] + 2 * FD_STENCIL_MAX; + + for (int i = 0; i < ARRAY_ELEMS(dg->components[0].bnd_is_outer); i++) + dg->components[0].bnd_is_outer[i] = 1; + + ctx = egs_alloc(dg, 0); + mg2di_dg_free(&dg); + if (!ctx) + return NULL; + + return ctx; +} + +EGSContext *mg2di_egs_alloc_mpi(MPI_Comm comm, const DomainGeometry *dg) +{ + EGSContext *ctx = NULL; + Rect *overlaps_recv = NULL, *overlaps_send = NULL; + ptrdiff_t *lo; + int local_component; + int ret; + + MPI_Comm_rank(comm, &local_component); + + overlaps_recv = calloc(dg->nb_components, sizeof(*overlaps_recv)); + overlaps_send = calloc(dg->nb_components, sizeof(*overlaps_send)); + if (!overlaps_recv || !overlaps_send) + goto fail; + + ret = mg2di_dg_edge_overlaps(overlaps_recv, overlaps_send, + dg, local_component, FD_STENCIL_MAX); + if (ret < 0) + goto fail; + + ctx = egs_alloc(dg, local_component); + + ctx->priv->comm = comm; + + ctx->priv->sync_sendtypes = calloc(dg->nb_components, sizeof(*ctx->priv->sync_sendtypes)); + ctx->priv->sync_senddispl = calloc(dg->nb_components, sizeof(*ctx->priv->sync_senddispl)); + ctx->priv->sync_sendcounts = calloc(dg->nb_components, sizeof(*ctx->priv->sync_sendcounts)); + ctx->priv->sync_recvtypes = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvtypes)); + ctx->priv->sync_recvdispl = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvdispl)); + ctx->priv->sync_recvcounts = calloc(dg->nb_components, sizeof(*ctx->priv->sync_recvcounts)); + if (!ctx->priv->sync_sendtypes || !ctx->priv->sync_senddispl || !ctx->priv->sync_sendcounts || + !ctx->priv->sync_recvtypes || !ctx->priv->sync_recvdispl || !ctx->priv->sync_recvcounts) + goto fail; + + lo = dg->components[local_component].interior.start; + + /* construct the send/receive parameters */ + for (unsigned int i = 0; i < dg->nb_components; i++) { + if (i == local_component) { + MPI_Type_dup(MPI_INT, &ctx->priv->sync_sendtypes[i]); + MPI_Type_dup(MPI_INT, &ctx->priv->sync_recvtypes[i]); + ctx->priv->sync_sendcounts[i] = 0; + ctx->priv->sync_recvcounts[i] = 0; + ctx->priv->sync_senddispl[i] = 0; + ctx->priv->sync_recvdispl[i] = 0; + + continue; + } + + /* receive */ + MPI_Type_vector(overlaps_recv[i].size[1], overlaps_recv[i].size[0], + ctx->u->stride[0], MPI_DOUBLE, &ctx->priv->sync_recvtypes[i]); + MPI_Type_commit(&ctx->priv->sync_recvtypes[i]); + ctx->priv->sync_recvcounts[i] = 1; + ctx->priv->sync_recvdispl[i] = ((overlaps_recv[i].start[1] - lo[1]) * ctx->u->stride[0] + + (overlaps_recv[i].start[0] - lo[0])) * sizeof(*ctx->u->data); + + /* send */ + MPI_Type_vector(overlaps_send[i].size[1], overlaps_send[i].size[0], + ctx->u->stride[0], MPI_DOUBLE, &ctx->priv->sync_sendtypes[i]); + MPI_Type_commit(&ctx->priv->sync_sendtypes[i]); + ctx->priv->sync_sendcounts[i] = 1; + ctx->priv->sync_senddispl[i] = ((overlaps_send[i].start[1] - lo[1]) * ctx->u->stride[0] + + (overlaps_send[i].start[0] - lo[0])) * sizeof(*ctx->u->data); + } + + return ctx; +fail: + free(overlaps_recv); + free(overlaps_send); + mg2di_egs_free(&ctx); + return NULL; +} + void mg2di_egs_free(EGSContext **pctx) { EGSContext *ctx = *pctx; @@ -1082,8 +1253,10 @@ void mg2di_egs_free(EGSContext **pctx) exact_arrays_free(ctx); mg2di_ndarray_free(&ctx->u); - mg2di_ndarray_free(&ctx->u_base); + mg2di_ndarray_free(&ctx->u_exterior); + mg2di_ndarray_free(&ctx->priv->u_base); mg2di_ndarray_free(&ctx->priv->u_next); + mg2di_ndarray_free(&ctx->priv->u_next_exterior); mg2di_ndarray_free(&ctx->priv->u_next_base); mg2di_ndarray_free(&ctx->rhs); @@ -1105,6 +1278,28 @@ void mg2di_egs_free(EGSContext **pctx) for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); + if (ctx->priv->sync_sendtypes) { + for (int i = 0; i < ctx->priv->dg->nb_components; i++) { + if (ctx->priv->sync_sendtypes[i]) + MPI_Type_free(&ctx->priv->sync_sendtypes[i]); + } + } + if (ctx->priv->sync_recvtypes) { + for (int i = 0; i < ctx->priv->dg->nb_components; i++) { + if (ctx->priv->sync_recvtypes[i]) + MPI_Type_free(&ctx->priv->sync_recvtypes[i]); + } + } + free(ctx->priv->sync_sendtypes); + free(ctx->priv->sync_recvtypes); + + free(ctx->priv->sync_sendcounts); + free(ctx->priv->sync_senddispl); + free(ctx->priv->sync_recvcounts); + free(ctx->priv->sync_recvdispl); + + mg2di_dg_free(&ctx->priv->dg); + tp_free(&ctx->priv->tp_internal); free(ctx->priv); diff --git a/ell_grid_solve.h b/ell_grid_solve.h index 7ce61a0..2893ce5 100644 --- a/ell_grid_solve.h +++ b/ell_grid_solve.h @@ -40,6 +40,9 @@ #include #include +#include + +#include "components.h" #include "log.h" #include "mg2d_boundary.h" #include "mg2d_constants.h" @@ -122,7 +125,8 @@ typedef struct EGSContext { int cpuflags; /** - * Size of the solver grid, set by mg2di_egs_alloc(). + * Size of the solver grid, set by mg2di_egs_alloc[_mpi](). For + * multi-component runs, this contains the size of this component only. * Read-only for the caller. */ size_t domain_size[2]; @@ -142,6 +146,9 @@ typedef struct EGSContext { /** * Boundary specification, indexed by MG2DBoundaryLoc. * To be filled by the caller before mg2di_egs_init(). + * + * For multi-component runs, only the outer (not inter-component) boundary + * specifications are accessed by the solver. */ MG2DBoundary *boundaries[4]; @@ -156,9 +163,9 @@ typedef struct EGSContext { NDArray *u; /** - * u including the ghost zones. + * u including the outer boundary ghost zones. */ - NDArray *u_base; + NDArray *u_exterior; /** * Values of the right-hand side. @@ -199,6 +206,7 @@ typedef struct EGSContext { Timer timer_res_calc; Timer timer_init; Timer timer_solve; + Timer timer_mpi_sync; } EGSContext; #define EGS_INIT_FLAG_SAME_DIFF_COEFFS (1 << 0) @@ -211,6 +219,18 @@ typedef struct EGSContext { * @return The solver context on success, NULL on failure. */ EGSContext *mg2di_egs_alloc(size_t domain_size[2]); + +/** + * Allocate a solver component in a multi-component MPI-based solve. + * + * @param comm The MPI communicator used to communicate with the other + * components + * @dg The geometry of the full computational domain. This component is indexed + * by its MPI rank in this geometry. + * + * @return The solver context on success, NULL on failure. + */ +EGSContext *mg2di_egs_alloc_mpi(MPI_Comm comm, const DomainGeometry *dg); /** * Initialize the solver for use, after all the required fields are filled by * the caller. diff --git a/meson.build b/meson.build index 0c55f2e..8f09386 100644 --- a/meson.build +++ b/meson.build @@ -6,6 +6,7 @@ add_project_arguments('-D_XOPEN_SOURCE=700', language : 'c') lib_src = [ 'bicgstab.c', 'boundary.c', + 'components.c', 'cpu.c', 'ell_grid_solve.c', 'log.c', @@ -27,7 +28,9 @@ liblapacke = cc.find_library('lapacke') dep_tp = declare_dependency(link_args : '-lthreadpool') -deps = [dep_tp, libm, libcblas, liblapacke] +dep_mpi = dependency('mpi', language: 'c') + +deps = [dep_tp, libm, libcblas, liblapacke, dep_mpi] cdata = configuration_data() cdata.set10('ARCH_X86', false) @@ -94,7 +97,7 @@ endif library('mg2d', lib_src, lib_obj, link_args : ver_flag, dependencies : deps, link_depends : verscript) # test programs -test_progs = ['relax', 'mg2d'] +test_progs = ['relax', 'relax_mpi', 'mg2d'] foreach t : test_progs target = t + '_test' executable(target, [target + '.c'] + lib_src, lib_obj, dependencies : deps + [libm]) diff --git a/mg2d.c b/mg2d.c index 25a1759..d09796b 100644 --- a/mg2d.c +++ b/mg2d.c @@ -192,7 +192,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact) /* prolongate the coarser-level correction */ mg2di_timer_start(&level->timer_prolong); ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp, - level->child->solver->u_base); + level->child->solver->u_exterior); mg2di_timer_stop(&level->timer_prolong); if (ret < 0) return ret; diff --git a/relax_mpi_test.c b/relax_mpi_test.c new file mode 100644 index 0000000..65888f6 --- /dev/null +++ b/relax_mpi_test.c @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include + +#include + +#include "components.h" +#include "ell_grid_solve.h" +#include "log.h" +#include "mg2d_boundary.h" +#include "mg2d_constants.h" +#include "ndarray.h" + +#define DOMAIN_SIZE 1.0 +#define FD_STENCIL 2 + +#if 1 +static double sol(double x, double y) +{ + return sin(M_PI * x) * sin(M_PI * y); +} +static double sol_dx(double x, double y) +{ + return M_PI * cos(M_PI * x) * sin(M_PI * y); +} +static double sol_dy(double x, double y) +{ + return M_PI * sin(M_PI * x) * cos(M_PI * y); +} +static double sol_dxx(double x, double y) +{ + return -M_PI * M_PI * sol(x, y); +} +static double sol_dyy(double x, double y) +{ + return -M_PI * M_PI * sol(x, y); +} +static double sol_dxy(double x, double y) +{ + return M_PI * M_PI * cos(M_PI * x) * cos(M_PI * y); +} +#endif + +int main(int argc, char **argv) +{ + EGSContext *ctx; + DomainGeometry *dg = NULL; + DomainComponent *dc = NULL; + long int log2N, log2maxiter; + long int N, maxiter; + double res_old, res_new; + int ret = 0; + + char processor_name[MPI_MAX_PROCESSOR_NAME]; + int nb_processes, rank, processor_name_len; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + log2N = strtol(argv[1], NULL, 0); + log2maxiter = strtol(argv[2], NULL, 0); + if (log2N <= 0 || log2maxiter < 0 || + log2N >= sizeof(N) * 8 || log2maxiter >= sizeof(maxiter) * 8) { + fprintf(stderr, "Invalid log2N/log2maxiter values: %ld/%ld\n", + log2N, log2maxiter); + return 1; + } + + N = (1L << log2N) + 1; + maxiter = 1L << log2maxiter; + + MPI_Init(NULL, NULL); + + MPI_Comm_size(MPI_COMM_WORLD, &nb_processes); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Get_processor_name(processor_name, &processor_name_len); + + fprintf(stderr, "This is process %d out of %d, running on %s\n", + rank, nb_processes, processor_name); + + dg = mg2di_dg_alloc(nb_processes); + if (!dg) { + fprintf(stderr, "Error allocating domain geometry\n"); + ret = 1; + goto fail; + } + + dg->domain_size[0] = N; + dg->domain_size[1] = N; + + for (unsigned int i = 0; i < dg->nb_components; i++) { + size_t patch_start, patch_end, patch_size_y; + + patch_size_y = (N + nb_processes - 1) / nb_processes; + patch_start = i * patch_size_y; + patch_end = MIN((i + 1) * patch_size_y, N); + patch_size_y = patch_end - patch_start; + if (patch_size_y <= 0) { + fprintf(stderr, "Too many processes for grid size %ld: %d\n", N, nb_processes); + ret = 1; + goto fail; + } + + dg->components[i].interior.start[0] = 0; + dg->components[i].interior.start[1] = patch_start; + dg->components[i].interior.size[0] = N; + dg->components[i].interior.size[1] = patch_size_y; + + dg->components[i].exterior.start[0] = -FD_STENCIL; + dg->components[i].exterior.start[1] = i ? patch_start : -FD_STENCIL; + dg->components[i].exterior.size[0] = N + 2 * FD_STENCIL; + dg->components[i].exterior.size[1] = patch_size_y + ((i == 0) * FD_STENCIL) + ((i == nb_processes - 1) * FD_STENCIL); + + dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0L] = 1; + dg->components[i].bnd_is_outer[MG2D_BOUNDARY_0U] = 1; + dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1L] = i == 0; + dg->components[i].bnd_is_outer[MG2D_BOUNDARY_1U] = i == nb_processes - 1; + } + dc = &dg->components[rank]; + + ctx = mg2di_egs_alloc_mpi(MPI_COMM_WORLD, dg); + if (!ctx) { + fprintf(stderr, "Error allocating the solver context\n"); + return 1; + } + + ctx->logger.log = mg2di_log_default_callback; + + ctx->step[0] = DOMAIN_SIZE / (N - 1.0); + ctx->step[1] = DOMAIN_SIZE / (N - 1.0); + + ctx->fd_stencil = FD_STENCIL; + + for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) { + MG2DBoundary *bnd = ctx->boundaries[bnd_loc]; + const int ci = mg2d_bnd_coord_idx(bnd_loc); + const int bnd_dir = mg2d_bnd_out_dir(bnd_loc); + + double coord[2]; + + if (!dc->bnd_is_outer[bnd_loc]) + continue; + + bnd->type = MG2D_BC_TYPE_FIXVAL; + + memset(bnd->val, 0, ctx->domain_size[!ci] * sizeof(*bnd->val)); + + for (int j = 1; j < ctx->fd_stencil; j++) { + double *dst = bnd->val + j * bnd->val_stride; + + coord[ci] = mg2d_bnd_is_upper(bnd_loc) * DOMAIN_SIZE + bnd_dir * j * ctx->step[ci]; + + for (ptrdiff_t k = -j; k < (ptrdiff_t)ctx->domain_size[!ci] + j; k++) { + coord[!ci] = (k + dc->interior.start[!ci]) * ctx->step[!ci]; + dst[k] = sol(coord[0], coord[1]); + } + } + } + + for (size_t y = 0; y < ctx->domain_size[1]; y++) { + const double y_coord = (y + dc->interior.start[1]) * ctx->step[1]; + + memset(NDPTR2D(ctx->u, 0, y), 0, sizeof(*ctx->u->data) * ctx->domain_size[0]); + + for (size_t x = 1; x < ctx->domain_size[0]; x++) { + const double x_coord = x * ctx->step[0]; + + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], x, y) = 1.0; + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], x, y) = 1.0; + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_11], x, y) = 1.0; + + *NDPTR2D(ctx->rhs, x, y) = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord); + } + + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); + } + + ret = mg2di_egs_init(ctx, 0); + if (ret < 0) { + fprintf(stderr, "Error initializing the solver context: %d\n", ret); + ret = 1; + goto fail; + } + + MPI_Reduce(&ctx->residual_max, &res_old, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + + for (int i = 0; i < maxiter; i++) { + ret = mg2di_egs_solve(ctx, EGS_SOLVE_RELAXATION, 0); + if (ret < 0) { + fprintf(stderr, "Error during relaxation\n"); + ret = 1; + goto fail; + } + MPI_Reduce(&ctx->residual_max, &res_new, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + if (rank == 0) { + if (res_new > 1e3) { + fprintf(stderr, "Diverged at step %d: %g -> %g\n", i, res_old, res_new); + goto fail; + } + fprintf(stderr, "Step %d: %g -> %g (%g)\n", i, res_old, res_new, res_old / res_new); + res_old = res_new; + } + } + + { + double max_err = 0.0; + + for (size_t y = 0; y < ctx->domain_size[1]; y++) { + const double y_coord = (y + dc->interior.start[1]) * ctx->step[1]; + + for (size_t x = 0; x < ctx->domain_size[0]; x++) { + const double x_coord = x * ctx->step[0]; + double err = fabs(ctx->u->data[y * ctx->u->stride[0] + x] - sol(x_coord, y_coord)); + if (err > max_err) + max_err = err; + } + } + MPI_Reduce(rank ? &max_err : MPI_IN_PLACE, &max_err, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + if (rank == 0) + fprintf(stderr, "max(|solution - exact|): %g\n", max_err); + } + +fail: + mg2di_egs_free(&ctx); + mg2di_dg_free(&dg); + MPI_Finalize(); + return ret; +} -- cgit v1.2.3