From 04cdac670f351fc7b256e7064d15fd644494a95b Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 16 Aug 2019 10:51:20 +0200 Subject: Add support for multi-component runs. --- schedule.ccl | 8 +- src/qms.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 311 insertions(+), 26 deletions(-) diff --git a/schedule.ccl b/schedule.ccl index e8241c1..cbea68c 100644 --- a/schedule.ccl +++ b/schedule.ccl @@ -7,11 +7,11 @@ if (CCTK_EQUALS(lapse_source, "QMS_MG")) { SCHEDULE qms_mg_eval IN ML_BSSN_evolCalcGroup BEFORE ML_BSSN_lapse_evol { LANG: C - SYNC: ML_lapse + SYNC: ML_W } "Quasimaximal slicing eval W" SCHEDULE qms_mg_eval IN ML_CCZ4_evolCalcGroup BEFORE ML_CCZ4_lapse_evol { LANG: C - SYNC: ML_lapse + SYNC: ML_W } "Quasimaximal slicing eval W" SCHEDULE qms_mg_solve IN CCTK_POSTSTEP { @@ -40,10 +40,6 @@ if (CCTK_EQUALS(lapse_source, "QMS_MG")) { LANG: C } "" - #SCHEDULE quasimaximal_slicing_axi IN MoL_PseudoEvolution { - # LANG: C - #} "Quasimaximal slicing" - STORAGE: W_mg[2] STORAGE: W_pred0 STORAGE: W_pred1 diff --git a/src/qms.c b/src/qms.c index cfad019..646c7b9 100644 --- a/src/qms.c +++ b/src/qms.c @@ -17,6 +17,8 @@ #include "cctk_Timers.h" #include "util_Table.h" +#include + #define SQR(x) ((x) * (x)) #define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0) #define MAX(x, y) ((x) > (y) ? (x) : (y)) @@ -32,6 +34,7 @@ typedef struct CoordPatch { ptrdiff_t grid_size[3]; MG2DContext *solver; + MPI_Comm solver_comm; int y_idx; @@ -40,6 +43,22 @@ typedef struct CoordPatch { double *filter; ptrdiff_t filter_stride; + + /* MPI sync for the initial guess */ + int nb_components; + ptrdiff_t (*solver_start)[2]; + size_t (*solver_size)[2]; + double *u_guess; + ptrdiff_t u_guess_stride; + ptrdiff_t u_guess_start[2]; + size_t u_guess_size[2]; + + int *u_guess_sendcounts; + int *u_guess_senddispl; + MPI_Datatype *u_guess_sendtypes; + int *u_guess_recvcounts; + int *u_guess_recvdispl; + MPI_Datatype *u_guess_recvtypes; } CoordPatch; typedef struct QMSMGContext { @@ -138,6 +157,69 @@ static void log_callback(const MG2DContext *ctx, int level, fputs(buf, stderr); } +/** + * Compute the refinement level geometry. + * + * The level is assumed to be a single square patch that is composed of + * nProcs components. The lower x and z boundaries are assumed to be reflection. + * For our purposes, both the physical outer boundaries and the refinement + * boundaries are equivalent, while the intercomponent boundaries are treated + * differently. Since Cactus only allows distinguishing between "physical" and + * "all other" boundaries, we need to compute the extents manually. + * + * @param level_size will contain the number of points in x/z directions on the + * complete refinement level, including all the ghost zones up to the outer + * level boundary. + * + * @param component_start indices of the component origin in the refinement level + * + * @param component_size number of points owned by this component; when the + * component is on the outer level boundary, this includes all the ghost zones + * up to the outer boundary. + */ +static void get_extents(size_t **pextents, int *level_size, cGH *gh) +{ + int nb_procs = CCTK_nProcs(gh); + int local_proc = CCTK_MyProc(gh); + + size_t *extents; + + extents = calloc(nb_procs, 4 * sizeof(*extents)); + if (!extents) + CCTK_WARN(0, "Error allocating the extents"); + + for (int dir = 0; dir < 2; dir++) { + extents[4 * local_proc + 0 + dir] = gh->cctk_lbnd[2 * dir]; + extents[4 * local_proc + 2 + dir] = gh->cctk_lsh[2 * dir] - gh->cctk_nghostzones[2 * dir]; + } + + if (nb_procs > 1) + MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, extents, 4 * sizeof(*extents), MPI_BYTE, MPI_COMM_WORLD); + + level_size[0] = 0; + level_size[1] = 0; + + for (int dir = 0; dir < 2; dir++) { + for (int comp = 0; comp < nb_procs; comp++) { + size_t start = extents[4 * comp + 0 + dir]; + size_t size = extents[4 * comp + 2 + dir]; + if (start + size > level_size[dir]) + level_size[dir] = start + size; + } + + // if the upper boundary is an inter-component boundary, substract the ghost zones + for (int comp = 0; comp < nb_procs; comp++) { + size_t start = extents[4 * comp + 0 + dir]; + size_t size = extents[4 * comp + 2 + dir]; + + if (start + size < level_size[dir]) + extents[4 * comp + 2 + dir] -= gh->cctk_nghostzones[2 * dir]; + } + } + + *pextents = extents; +} + static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) { cGH *gh = ms->gh; @@ -146,11 +228,20 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) const int integrator_substeps = MoLNumIntegratorSubsteps(); + const int nb_procs = CCTK_nProcs(gh); + const int local_proc = CCTK_MyProc(gh); + const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); - size_t solver_size[2]; + size_t *extents; + size_t component_start[2], component_size[2]; + + int level_size[2]; + + int *solver_ranks; + int nb_solver_ranks; CoordPatch *cp; MG2DContext *solver; @@ -182,24 +273,78 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) if (i == gh->cctk_lsh[1]) CCTK_WARN(0, "The grid does not include y==0"); + get_extents(&extents, level_size, gh); + component_start[0] = extents[4 * local_proc + 0 + 0]; + component_start[1] = extents[4 * local_proc + 0 + 1]; + component_size[0] = extents[4 * local_proc + 2 + 0]; + component_size[1] = extents[4 * local_proc + 2 + 1]; for (int dir = 0; dir < 2; dir++) { - ptrdiff_t offset_right; + cp->bnd_intercomp[dir][0] = component_start[dir] > 0; + cp->bnd_intercomp[dir][1] = (component_start[dir] + component_size[dir]) < level_size[dir]; + cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir]; + } + + if (level_size[0] != level_size[1]) + CCTK_WARN(0, "Non-square domain"); + + solver_ranks = calloc(nb_procs, sizeof(*solver_ranks)); + if (!solver_ranks) + CCTK_WARN(0, "Error allocating solver ranks"); + + cp->nb_components = nb_procs; + cp->solver_size = calloc(nb_procs, sizeof(*cp->solver_size)); + cp->solver_start = calloc(nb_procs, sizeof(*cp->solver_start)); + if (!cp->solver_size || !cp->solver_start) + CCTK_WARN(0, "Error allocating solver sizes/starts"); + + nb_solver_ranks = 0; + for (int proc = 0; proc < nb_procs; proc++) { + for (int dir = 0; dir < 2; dir++) { + size_t start = extents[4 * proc + 0 + dir]; + size_t end = start + extents[4 * proc + 2 + dir]; + ptrdiff_t offset_right; + + offset_right = gh->cctk_nghostzones[2 * dir]; + if (cp->level > 0) { + offset_right *= integrator_substeps * 2; + offset_right -= ms->boundary_offset; + } - cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir]; + end = MIN(end, level_size[dir] - offset_right); - offset_right = gh->cctk_nghostzones[2 * dir]; - if (cp->level > 0) { - offset_right *= integrator_substeps * 2; - offset_right -= ms->boundary_offset; + if (end > start) { + cp->solver_start[proc][dir] = start; + cp->solver_size[proc][dir] = end - start; + } else { + cp->solver_start[proc][dir] = 0; + cp->solver_size[proc][dir] = 0; + } } - solver_size[dir] = cp->grid_size[2 * dir] - cp->offset_left[dir] - offset_right; + if (cp->solver_size[proc][0] > 0 && cp->solver_size[proc][1] > 0) + solver_ranks[nb_solver_ranks++] = proc; } - if (solver_size[0] != solver_size[1]) - CCTK_WARN(0, "Non-square domain"); + if (nb_solver_ranks > 1) { + MPI_Group grp_world, grp_solver; + + MPI_Comm_group(MPI_COMM_WORLD, &grp_world); + MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver); + MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_comm); + MPI_Group_free(&grp_solver); + MPI_Group_free(&grp_world); + } else { + cp->solver_comm = MPI_COMM_NULL; + } + + if (!(cp->solver_size[local_proc][0] > 0 && cp->solver_size[local_proc][1] > 0)) + goto finish; + + if (cp->solver_comm == MPI_COMM_NULL) + solver = mg2d_solver_alloc(cp->solver_size[local_proc][0]); + else + solver = mg2d_solver_alloc_mpi(cp->solver_comm, component_start, cp->solver_size[local_proc]); - solver = mg2d_solver_alloc(solver_size[0]); if (!solver) CCTK_WARN(0, "Error allocating the solver"); @@ -237,7 +382,7 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) int ci = mg2d_bnd_coord_idx(i); for (size_t j = 0; j < solver->fd_stencil; j++) { memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j, - 0, sizeof(*solver->boundaries[i]->val) * (solver_size[!ci] + 2 * j)); + 0, sizeof(*solver->boundaries[i]->val) * (solver->local_size[!ci] + 2 * j)); } } @@ -247,15 +392,15 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) } if ((solver->domain_size - 1) * solver->step[0] >= ms->filter_start) { - cp->filter_stride = solver->domain_size; - cp->filter = calloc(solver->domain_size * cp->filter_stride, sizeof(*cp->filter)); + cp->filter_stride = solver->local_size[0]; + cp->filter = calloc(solver->local_size[1] * cp->filter_stride, sizeof(*cp->filter)); if (!cp->filter) CCTK_WARN(0, "Error allocating the filter"); - for (int idx_z = 0; idx_z < solver->domain_size; idx_z++) { - const double z = solver->step[1] * idx_z; - for (int idx_x = 0; idx_x < solver->domain_size; idx_x++) { - const double x = solver->step[0] * idx_x; + for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) { + const double z = solver->step[1] * (solver->local_start[1] + idx_z); + for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { + const double x = solver->step[0] * (solver->local_start[0] + idx_x); const double r = sqrt(SQR(x) + SQR(z)); double fact = exp(-36.0 * (pow((r - ms->filter_start) / (ms->filter_end - ms->filter_start), 6.0))); @@ -266,6 +411,10 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) } } +finish: + free(solver_ranks); + free(extents); + ms->nb_patches++; return cp; } @@ -274,6 +423,9 @@ static void coord_patch_free(CoordPatch *cp) { mg2d_solver_free(&cp->solver); + if (cp->solver_comm != MPI_COMM_NULL) + MPI_Comm_free(&cp->solver_comm); + free(cp->filter); } @@ -971,6 +1123,144 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) } } +typedef struct Rect { + ptrdiff_t start[2]; + size_t size[2]; +} Rect; + +static int rect_intersect(Rect *dst, const Rect *src1, const Rect *src2) +{ + ptrdiff_t intersect_start0 = MAX(src1->start[0], src2->start[0]); + ptrdiff_t intersect_start1 = MAX(src1->start[1], src2->start[1]); + ptrdiff_t intersect_end0 = MIN(src1->start[0] + src1->size[0], src2->start[0] + src2->size[0]); + ptrdiff_t intersect_end1 = MIN(src1->start[1] + src1->size[1], src2->start[1] + src2->size[1]); + + if (intersect_start0 < intersect_end0 && intersect_start1 < intersect_end1) { + dst->start[0] = intersect_start0; + dst->start[1] = intersect_start1; + dst->size[0] = intersect_end0 - intersect_start0; + dst->size[1] = intersect_end1 - intersect_start1; + + return 1; + } + + dst->size[0] = 0; + dst->size[1] = 0; + + return 0; +} + +static int guess_from_coarser(QMSMGContext *ms, CoordPatch *cp, CoordPatch *cp_coarse) +{ + int ret; + + if (cp->nb_components > 1) { + int comp_fine, comp_coarse = 0; + + // FIXME skip levels with skipped components, + // those require somewhat more complex logic to implement + MPI_Comm_size(cp->solver_comm, &comp_fine); + if (cp_coarse->solver_comm != MPI_COMM_NULL) + MPI_Comm_size(cp_coarse->solver_comm, &comp_coarse); + if (comp_fine != cp->nb_components || comp_coarse != cp->nb_components) + return 0; + + if (!cp->u_guess) { + const int ghosts = cp->solver->fd_stencil + 1; + int local_proc = CCTK_MyProc(ms->gh); + + cp->u_guess_sendcounts = calloc(cp->nb_components, sizeof(*cp->u_guess_sendcounts)); + cp->u_guess_senddispl = calloc(cp->nb_components, sizeof(*cp->u_guess_senddispl)); + cp->u_guess_sendtypes = calloc(cp->nb_components, sizeof(*cp->u_guess_sendtypes)); + cp->u_guess_recvcounts = calloc(cp->nb_components, sizeof(*cp->u_guess_recvcounts)); + cp->u_guess_recvdispl = calloc(cp->nb_components, sizeof(*cp->u_guess_recvdispl)); + cp->u_guess_recvtypes = calloc(cp->nb_components, sizeof(*cp->u_guess_recvtypes)); + if (!cp->u_guess_sendcounts || !cp->u_guess_senddispl || !cp->u_guess_sendtypes || + !cp->u_guess_recvcounts || !cp->u_guess_recvdispl || !cp->u_guess_recvtypes) + return -ENOMEM; + + cp->u_guess_size[0] = cp->solver_size[local_proc][0] / 2 + 2 * ghosts; + cp->u_guess_size[1] = cp->solver_size[local_proc][1] / 2 + 2 * ghosts; + cp->u_guess_start[0] = cp->solver_start[local_proc][0] / 2 - ghosts; + cp->u_guess_start[1] = cp->solver_start[local_proc][1] / 2 - ghosts; + cp->u_guess_stride = cp->u_guess_size[0]; + cp->u_guess = calloc(cp->u_guess_stride * cp->u_guess_size[1], + sizeof(*cp->u_guess)); + if (!cp->u_guess) + return -ENOMEM; + + for (int comp_fine = 0; comp_fine < cp->nb_components; comp_fine++) { + Rect dst_fine = { + .start = { cp->solver_start[comp_fine][0], cp->solver_start[comp_fine][1]}, + .size = { cp->solver_size[comp_fine][0], cp->solver_size[comp_fine][1]}, + }; + Rect dst_coarse = { + .start = { dst_fine.start[0] / 2 - ghosts, dst_fine.start[1] / 2 - ghosts }, + .size = { dst_fine.size[0] / 2 + ghosts * 2, dst_fine.size[1] / 2 + ghosts * 2 }, + }; + + for (int comp_coarse = 0; comp_coarse < cp->nb_components; comp_coarse++) { + Rect overlap; + Rect src = { + .start = { cp_coarse->solver_start[comp_coarse][0], cp_coarse->solver_start[comp_coarse][1]}, + .size = { cp_coarse->solver_size[comp_coarse][0], cp_coarse->solver_size[comp_coarse][1]}, + }; + for (int dim = 0; dim < 2; dim++) { + if (src.start[dim] == 0) { + src.start[dim] -= ghosts; + src.size[dim] += ghosts; + } + } + + rect_intersect(&overlap, &dst_coarse, &src); + if (comp_fine == local_proc) { + if (overlap.size[0] > 0 && overlap.size[1] > 0) { + cp->u_guess_recvcounts[comp_coarse] = 1; + cp->u_guess_recvdispl[comp_coarse] = ((overlap.start[1] - dst_coarse.start[1]) * cp->u_guess_stride + + (overlap.start[0] - dst_coarse.start[0])) * sizeof(double); + MPI_Type_vector(overlap.size[1], overlap.size[0], cp->u_guess_stride, + MPI_DOUBLE, &cp->u_guess_recvtypes[comp_coarse]); + MPI_Type_commit(&cp->u_guess_recvtypes[comp_coarse]); + } else { + cp->u_guess_recvcounts[comp_coarse] = 0; + cp->u_guess_recvdispl[comp_coarse] = 0; + MPI_Type_dup(MPI_BYTE, &cp->u_guess_recvtypes[comp_coarse]); + } + } + if (comp_coarse == local_proc) { + if (overlap.size[0] > 0 && overlap.size[1] > 0) { + ptrdiff_t src_origin[2] = { cp_coarse->solver_start[comp_coarse][0], cp_coarse->solver_start[comp_coarse][1]}; + cp->u_guess_sendcounts[comp_fine] = 1; + cp->u_guess_senddispl[comp_fine] = ((overlap.start[1] - src_origin[1]) * cp_coarse->solver->u_stride + + (overlap.start[0] - src_origin[0])) * sizeof(double); + MPI_Type_vector(overlap.size[1], overlap.size[0], cp_coarse->solver->u_stride, + MPI_DOUBLE, &cp->u_guess_sendtypes[comp_fine]); + MPI_Type_commit(&cp->u_guess_sendtypes[comp_fine]); + } else { + cp->u_guess_sendcounts[comp_fine] = 0; + cp->u_guess_senddispl[comp_fine] = 0; + MPI_Type_dup(MPI_BYTE, &cp->u_guess_sendtypes[comp_fine]); + } + } + } + } + } + + MPI_Alltoallw(cp_coarse->solver->u, cp->u_guess_sendcounts, cp->u_guess_senddispl, cp->u_guess_sendtypes, + cp->u_guess, cp->u_guess_recvcounts, cp->u_guess_recvdispl, cp->u_guess_recvtypes, + MPI_COMM_WORLD); + ret = mg2d_init_guess(cp->solver, + cp->u_guess, cp->u_guess_stride, + cp->u_guess_start, cp->u_guess_size, cp_coarse->solver->step); + } else { + ret = mg2d_init_guess(cp->solver, cp_coarse->solver->u, cp_coarse->solver->u_stride, + cp_coarse->solver->local_start, cp_coarse->solver->local_size, cp_coarse->solver->step); + } + + return ret; +} + + void qms_mg_solve(CCTK_ARGUMENTS) { QMSMGContext *ms = qms_context; @@ -1039,8 +1329,7 @@ void qms_mg_solve(CCTK_ARGUMENTS) /* use the solution from the coarser level as the initial guess */ CoordPatch *cp_coarse = get_coord_patch(ms, reflevel - 1); - ret = mg2d_init_guess(cp->solver, cp_coarse->solver->u, cp_coarse->solver->u_stride, - cp_coarse->solver->local_start, cp_coarse->solver->local_size, cp_coarse->solver->step); + ret = guess_from_coarser(ms, cp, cp_coarse); if (ret < 0) CCTK_WARN(0, "Error setting the initial guess"); } -- cgit v1.2.3