From fe88665e50aac0cdecc6d0b1e27a58bbdf937950 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 26 Jun 2019 17:47:06 +0200 Subject: Make initial guess interpolation work with MPI. --- src/maximal_slicing_axi_mg.c | 222 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 202 insertions(+), 20 deletions(-) diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 9d32120..d9d1508 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -22,6 +22,7 @@ #define SQR(x) ((x) * (x)) #define ABS(x) ((x >= 0) ? (x) : -(x)) #define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) #define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) @@ -75,6 +76,9 @@ typedef struct CoordPatch { ptrdiff_t y_idx; + size_t *extents; + int nb_extents; + /* number of x/z grid points by which the elliptic solver domain is offset * from the cactus grid */ ptrdiff_t offset_left[2]; @@ -89,6 +93,21 @@ typedef struct CoordPatch { int *recvcounts; int *recvdispl; MPI_Datatype *recvtypes; + + /* MPI sync for the initial guess */ + ptrdiff_t (*solver_start)[2]; + size_t (*solver_size)[2]; + double *u_guess; + ptrdiff_t u_guess_stride; + ptrdiff_t u_guess_start[2]; + size_t u_guess_size[2]; + + int *u_guess_sendcounts; + int *u_guess_senddispl; + MPI_Datatype *u_guess_sendtypes; + int *u_guess_recvcounts; + int *u_guess_recvdispl; + MPI_Datatype *u_guess_recvtypes; } CoordPatch; typedef struct MSMGContext { @@ -173,6 +192,8 @@ static void coord_patch_free(CoordPatch *cp) cp->solver_eval = NULL; cp->nb_solver_eval = 0; + free(cp->extents); + if (cp->synctype) MPI_Type_free(&cp->synctype); free(cp->sendcounts); @@ -181,6 +202,26 @@ static void coord_patch_free(CoordPatch *cp) free(cp->recvcounts); free(cp->recvdispl); free(cp->recvtypes); + + if (cp->u_guess_sendtypes) { + for (int i = 0; i < cp->nb_components; i++) + if (cp->u_guess_sendtypes[i] != MPI_DATATYPE_NULL) + MPI_Type_free(&cp->u_guess_sendtypes[i]); + } + if (cp->u_guess_recvtypes) { + for (int i = 0; i < cp->nb_components; i++) + if (cp->u_guess_recvtypes[i] != MPI_DATATYPE_NULL) + MPI_Type_free(&cp->u_guess_recvtypes[i]); + } + free(cp->u_guess); + free(cp->u_guess_sendcounts); + free(cp->u_guess_senddispl); + free(cp->u_guess_sendtypes); + free(cp->u_guess_recvcounts); + free(cp->u_guess_recvdispl); + free(cp->u_guess_recvtypes); + free(cp->solver_size); + free(cp->solver_start); } static void log_callback(const MG2DContext *ctx, int level, @@ -342,7 +383,7 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) CoordPatch *cp; size_t *extents; - size_t interior_size[2], component_start[2], component_size[2]; + size_t component_start[2], component_size[2]; int *solver_ranks; int nb_solver_ranks; @@ -396,6 +437,12 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (!solver_ranks) CCTK_WARN(0, "Error allocating solver ranks"); + cp->nb_components = nb_procs; + cp->solver_size = calloc(nb_procs, sizeof(*cp->solver_size)); + cp->solver_start = calloc(nb_procs, sizeof(*cp->solver_start)); + if (!cp->solver_size || !cp->solver_start) + CCTK_WARN(0, "Error allocating solver sizes/starts"); + /* on the finest level we allocate a solver for each of the substeps * between the coarser-grid timelevels */ if (level == ms->nb_levels - 1) { @@ -470,7 +517,6 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (!cp->sendcounts || !cp->senddispl || !cp->sendtypes || !cp->recvcounts || !cp->recvdispl || !cp->recvtypes) CCTK_WARN(0, "Error allocating sync arrays"); - cp->nb_components = nb_procs; MPI_Type_vector(gz, gx, ms->gh->cctk_lsh[0], MPI_DOUBLE, &cp->synctype); MPI_Type_commit(&cp->synctype); @@ -599,19 +645,17 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) offset_right--; end = MIN(end, level_size[dir] - offset_right); - if (end > start) - size[dir] = end - start; - else - size[dir] = 0; + if (end > start) { + cp->solver_start[proc][dir] = start; + cp->solver_size[proc][dir] = end - start; + } else { + cp->solver_start[proc][dir] = 0; + cp->solver_size[proc][dir] = 0; + } } - if (size[0] > 0 && size[1] > 0) + if (cp->solver_size[proc][0] > 0 && cp->solver_size[proc][1] > 0) solver_ranks[nb_solver_ranks++] = proc; - - if (proc == local_proc) { - interior_size[0] = size[0]; - interior_size[1] = size[1]; - } } if (nb_solver_ranks > 1) { @@ -625,15 +669,17 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) } else cp->solver_comm = MPI_COMM_NULL; - if (interior_size[0] > 0 && interior_size[1] > 0) { - cp->solver = solver_alloc(ms, level, component_start, interior_size, + if (cp->solver_size[local_proc][0] > 0 && cp->solver_size[local_proc][1] > 0) { + cp->solver = solver_alloc(ms, level, cp->solver_start[local_proc], cp->solver_size[local_proc], a_x[1] - a_x[0], cp->solver_comm); if (!cp->solver) CCTK_WARN(0, "Error allocating the solver"); } free(solver_ranks); - free(extents); + + cp->extents = extents; + cp->nb_extents = nb_procs; ms->nb_patches++; return cp; @@ -1104,6 +1150,143 @@ finish: ms->count_eval++; } +typedef struct Rect { + ptrdiff_t start[2]; + size_t size[2]; +} Rect; + +static int rect_intersect(Rect *dst, const Rect *src1, const Rect *src2) +{ + ptrdiff_t intersect_start0 = MAX(src1->start[0], src2->start[0]); + ptrdiff_t intersect_start1 = MAX(src1->start[1], src2->start[1]); + ptrdiff_t intersect_end0 = MIN(src1->start[0] + src1->size[0], src2->start[0] + src2->size[0]); + ptrdiff_t intersect_end1 = MIN(src1->start[1] + src1->size[1], src2->start[1] + src2->size[1]); + + if (intersect_start0 < intersect_end0 && intersect_start1 < intersect_end1) { + dst->start[0] = intersect_start0; + dst->start[1] = intersect_start1; + dst->size[0] = intersect_end0 - intersect_start0; + dst->size[1] = intersect_end1 - intersect_start1; + + return 1; + } + + dst->size[0] = 0; + dst->size[1] = 0; + + return 0; +} + +static int guess_from_coarser(MSMGContext *ms, CoordPatch *cp, CoordPatch *cp_coarse) +{ + int ret; + + if (cp->nb_components > 1) { + int comp_fine, comp_coarse = 0; + + // FIXME skip levels with skipped components, + // those require somewhat more complex logic to implement + MPI_Comm_size(cp->solver_comm, &comp_fine); + if (cp_coarse->solver_comm != MPI_COMM_NULL) + MPI_Comm_size(cp_coarse->solver_comm, &comp_coarse); + if (comp_fine != cp->nb_components || comp_coarse != cp->nb_components) + return 0; + + if (!cp->u_guess) { + const int ghosts = cp->solver->fd_stencil + 1; + int local_proc = CCTK_MyProc(ms->gh); + + cp->u_guess_sendcounts = calloc(cp->nb_components, sizeof(*cp->u_guess_sendcounts)); + cp->u_guess_senddispl = calloc(cp->nb_components, sizeof(*cp->u_guess_senddispl)); + cp->u_guess_sendtypes = calloc(cp->nb_components, sizeof(*cp->u_guess_sendtypes)); + cp->u_guess_recvcounts = calloc(cp->nb_components, sizeof(*cp->u_guess_recvcounts)); + cp->u_guess_recvdispl = calloc(cp->nb_components, sizeof(*cp->u_guess_recvdispl)); + cp->u_guess_recvtypes = calloc(cp->nb_components, sizeof(*cp->u_guess_recvtypes)); + if (!cp->u_guess_sendcounts || !cp->u_guess_senddispl || !cp->u_guess_sendtypes || + !cp->u_guess_recvcounts || !cp->u_guess_recvdispl || !cp->u_guess_recvtypes) + return -ENOMEM; + + cp->u_guess_size[0] = cp->solver_size[local_proc][0] / 2 + 2 * ghosts; + cp->u_guess_size[1] = cp->solver_size[local_proc][1] / 2 + 2 * ghosts; + cp->u_guess_start[0] = cp->solver_start[local_proc][0] / 2 - ghosts; + cp->u_guess_start[1] = cp->solver_start[local_proc][1] / 2 - ghosts; + cp->u_guess_stride = cp->u_guess_size[0]; + cp->u_guess= calloc(cp->u_guess_stride * cp->u_guess_size[1], + sizeof(*cp->u_guess)); + if (!cp->u_guess) + return -ENOMEM; + + for (int comp_fine = 0; comp_fine < cp->nb_components; comp_fine++) { + Rect dst_fine = { + .start = { cp->solver_start[comp_fine][0], cp->solver_start[comp_fine][1]}, + .size = { cp->solver_size[comp_fine][0], cp->solver_size[comp_fine][1]}, + }; + Rect dst_coarse = { + .start = { dst_fine.start[0] / 2 - ghosts, dst_fine.start[1] / 2 - ghosts }, + .size = { dst_fine.size[0] / 2 + ghosts * 2, dst_fine.size[1] / 2 + ghosts * 2 }, + }; + + for (int comp_coarse = 0; comp_coarse < cp->nb_components; comp_coarse++) { + Rect overlap; + Rect src = { + .start = { cp_coarse->solver_start[comp_coarse][0], cp_coarse->solver_start[comp_coarse][1]}, + .size = { cp_coarse->solver_size[comp_coarse][0], cp_coarse->solver_size[comp_coarse][1]}, + }; + for (int dim = 0; dim < 2; dim++) { + if (src.start[dim] == 0) { + src.start[dim] -= ghosts; + src.size[dim] += ghosts; + } + } + + rect_intersect(&overlap, &dst_coarse, &src); + if (comp_fine == local_proc) { + if (overlap.size[0] > 0 && overlap.size[1] > 0) { + cp->u_guess_recvcounts[comp_coarse] = 1; + cp->u_guess_recvdispl[comp_coarse] = ((overlap.start[1] - dst_coarse.start[1]) * cp->u_guess_stride + + (overlap.start[0] - dst_coarse.start[0])) * sizeof(double); + MPI_Type_vector(overlap.size[1], overlap.size[0], cp->u_guess_stride, + MPI_DOUBLE, &cp->u_guess_recvtypes[comp_coarse]); + MPI_Type_commit(&cp->u_guess_recvtypes[comp_coarse]); + } else { + cp->u_guess_recvcounts[comp_coarse] = 0; + cp->u_guess_recvdispl[comp_coarse] = 0; + cp->u_guess_recvtypes[comp_coarse] = MPI_BYTE; + } + } + if (comp_coarse == local_proc) { + if (overlap.size[0] > 0 && overlap.size[1] > 0) { + ptrdiff_t src_origin[2] = { cp_coarse->solver_start[comp_coarse][0], cp_coarse->solver_start[comp_coarse][1]}; + cp->u_guess_sendcounts[comp_fine] = 1; + cp->u_guess_senddispl[comp_fine] = ((overlap.start[1] - src_origin[1]) * cp_coarse->solver->u_stride + + (overlap.start[0] - src_origin[0])) * sizeof(double); + MPI_Type_vector(overlap.size[1], overlap.size[0], cp_coarse->solver->u_stride, + MPI_DOUBLE, &cp->u_guess_sendtypes[comp_fine]); + MPI_Type_commit(&cp->u_guess_sendtypes[comp_fine]); + } else { + cp->u_guess_sendcounts[comp_fine] = 0; + cp->u_guess_senddispl[comp_fine] = 0; + cp->u_guess_sendtypes[comp_fine] = MPI_BYTE; + } + } + } + } + } + + MPI_Alltoallw(cp_coarse->solver->u, cp->u_guess_sendcounts, cp->u_guess_senddispl, cp->u_guess_sendtypes, + cp->u_guess, cp->u_guess_recvcounts, cp->u_guess_recvdispl, cp->u_guess_recvtypes, + MPI_COMM_WORLD); + ret = mg2d_init_guess(cp->solver, + cp->u_guess, cp->u_guess_stride, + cp->u_guess_start, cp->u_guess_size, cp_coarse->solver->step); + } else { + ret = mg2d_init_guess(cp->solver, cp_coarse->solver->u, cp_coarse->solver->u_stride, + cp_coarse->solver->local_start, cp_coarse->solver->local_size, cp_coarse->solver->step); + } + + return ret; +} + void msa_mg_solve(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; @@ -1194,12 +1377,11 @@ void msa_mg_solve(CCTK_ARGUMENTS) } } else { /* use the solution from the coarser level as the initial guess */ - //CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1); + CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1); - //ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride, - // cp1->solver->local_size, cp1->solver->step); - //if (ret < 0) - // CCTK_WARN(0, "Error setting the initial guess"); + ret = guess_from_coarser(ms, cp, cp1); + if (ret < 0) + CCTK_WARN(0, "Error setting the initial guess"); } /* if the condition above was false, then lapse_mg should be filled by -- cgit v1.2.3