From dcae690175ce7f552a7cfc12dd96b85bf2bb08eb Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 27 Sep 2019 10:07:10 +0200 Subject: Revert "Make MPI work again after f60e4c9dfa675dd849ccb5830fabdbc5561562f3." This reverts commit 1d1492cf2fa724d3e8ab358ac3c7c70d4a85ed0f. --- schedule.ccl | 4 -- src/qms.c | 213 +++++++++++------------------------------------------------ 2 files changed, 38 insertions(+), 179 deletions(-) diff --git a/schedule.ccl b/schedule.ccl index 8ed8a1c..1f2b6d0 100644 --- a/schedule.ccl +++ b/schedule.ccl @@ -25,10 +25,6 @@ if (CCTK_EQUALS(lapse_source, "QMS_MG")) { SCHEDULE qms_mg_sync IN CCTK_POSTSTEP BEFORE qms_mg_solve { SYNC: W_val - SYNC: W_pred0 - SYNC: W_pred1 - SYNC: W_val0 - SYNC: W_val1 LANG: C } "" diff --git a/src/qms.c b/src/qms.c index 902492c..a82cc01 100644 --- a/src/qms.c +++ b/src/qms.c @@ -49,10 +49,8 @@ typedef struct CoordPatch { ptrdiff_t offset_left[2]; int bnd_intercomp[2][2]; - size_t *extents; - int nb_extents; - - size_t interior_size[2]; + double *interp_tmp0; + double *interp_tmp1; /* MPI sync for the initial guess */ int nb_components; @@ -69,20 +67,6 @@ typedef struct CoordPatch { int *u_guess_recvcounts; int *u_guess_recvdispl; MPI_Datatype *u_guess_recvtypes; - - /* MPI sync for the coarser-level prediction */ - double *interp_tmp[2]; - double *interp_tmp_sync; - ptrdiff_t interp_tmp_stride; - ptrdiff_t interp_tmp_start[2]; - size_t interp_tmp_size[2]; - - int *interp_tmp_sendcounts; - int *interp_tmp_senddispl; - MPI_Datatype *interp_tmp_sendtypes; - int *interp_tmp_recvcounts; - int *interp_tmp_recvdispl; - MPI_Datatype *interp_tmp_recvtypes; } CoordPatch; typedef struct QMSMGContext { @@ -189,14 +173,15 @@ static void log_callback(const MG2DContext *ctx, int level, * differently. Since Cactus only allows distinguishing between "physical" and * "all other" boundaries, we need to compute the extents manually. * - * @param pextents On return will contain a nProcs*4-sized array, where for each - * i from 0 to nProcs, elements [i * 4 + 0/1] contain the index of the x/z - * component origin, while elements [i * 4 + 2/3] contain the number of points - * owned by that component along x/z dimension. - * * @param level_size will contain the number of points in x/z directions on the * complete refinement level, including all the ghost zones up to the outer * level boundary. + * + * @param component_start indices of the component origin in the refinement level + * + * @param component_size number of points owned by this component; when the + * component is on the outer level boundary, this includes all the ghost zones + * up to the outer boundary. */ static void get_extents(size_t **pextents, int *level_size, cGH *gh) { @@ -412,21 +397,16 @@ static int alloc_coord_patch(QMSMGContext *ms, int level) } } - cp->interp_tmp[0] = calloc(cp->grid_size[0] * cp->grid_size[2], - sizeof(*cp->interp_tmp[0])); - cp->interp_tmp[1] = calloc(cp->grid_size[0] * cp->grid_size[2], - sizeof(*cp->interp_tmp[1])); - if (!cp->interp_tmp[0] || !cp->interp_tmp[1]) - CCTK_WARN(0, "Error allocating interp arrays"); - - cp->extents = extents; - cp->nb_extents = nb_procs; - - cp->interior_size[0] = level_size[0]; - cp->interior_size[1] = level_size[1]; + if (level > 0) { + cp->interp_tmp0 = calloc(cp->grid_size[0] * cp->grid_size[2], sizeof(*cp->interp_tmp0)); + cp->interp_tmp1 = calloc(cp->grid_size[0] * cp->grid_size[2], sizeof(*cp->interp_tmp1)); + if (!cp->interp_tmp0 || !cp->interp_tmp1) + CCTK_WARN(0, "Error allocating arrays"); + } finish: free(solver_ranks); + free(extents); ms->nb_patches++; return 0; @@ -444,8 +424,6 @@ static void coord_patch_free(CoordPatch **pcp) if (cp->solver_comm != MPI_COMM_NULL) MPI_Comm_free(&cp->solver_comm); - free(cp->extents); - free(cp); *pcp = NULL; } @@ -1275,121 +1253,6 @@ static int guess_from_coarser(QMSMGContext *ms, CoordPatch *cp, CoordPatch *cp_c return ret; } -static int interp_coarse(QMSMGContext *ms, CoordPatch *cp, double *dst, - CoordPatch *cp_coarse, const double *src) -{ - int ret; - - if (cp->nb_components > 1) { - const int local_proc = CCTK_MyProc(ms->gh); - int comp_fine, comp_coarse = 1; - - MPI_Comm_size(cp->solver_comm, &comp_fine); - if (cp_coarse->solver_comm != MPI_COMM_NULL) - MPI_Comm_size(cp_coarse->solver_comm, &comp_coarse); - - qms_assert(comp_fine == cp->nb_components && comp_coarse == cp->nb_components); - - if (!cp->interp_tmp_sync) { - const int ghosts = ms->gh->cctk_nghostzones[0]; - - cp->interp_tmp_sendcounts = calloc(cp->nb_components, sizeof(*cp->interp_tmp_sendcounts)); - cp->interp_tmp_senddispl = calloc(cp->nb_components, sizeof(*cp->interp_tmp_senddispl)); - cp->interp_tmp_sendtypes = calloc(cp->nb_components, sizeof(*cp->interp_tmp_sendtypes)); - cp->interp_tmp_recvcounts = calloc(cp->nb_components, sizeof(*cp->interp_tmp_recvcounts)); - cp->interp_tmp_recvdispl = calloc(cp->nb_components, sizeof(*cp->interp_tmp_recvdispl)); - cp->interp_tmp_recvtypes = calloc(cp->nb_components, sizeof(*cp->interp_tmp_recvtypes)); - if (!cp->interp_tmp_sendcounts || !cp->interp_tmp_senddispl || !cp->interp_tmp_sendtypes || - !cp->interp_tmp_recvcounts || !cp->interp_tmp_recvdispl || !cp->interp_tmp_recvtypes) - return -ENOMEM; - - cp->interp_tmp_start[0] = (ptrdiff_t)cp->extents[4 * local_proc + 0] / 2 - ghosts; - cp->interp_tmp_start[1] = (ptrdiff_t)cp->extents[4 * local_proc + 1] / 2 - ghosts; - cp->interp_tmp_size[0] = cp->extents[4 * local_proc + 2] / 2 + 2 * ghosts; - cp->interp_tmp_size[1] = cp->extents[4 * local_proc + 3] / 2 + 2 * ghosts; - cp->interp_tmp_stride = cp->interp_tmp_size[0]; - - cp->interp_tmp_sync = calloc(cp->interp_tmp_stride * cp->interp_tmp_size[1], - sizeof(*cp->interp_tmp_sync)); - if (!cp->interp_tmp_sync) - return -ENOMEM; - - for (int comp_fine = 0; comp_fine < cp->nb_components; comp_fine++) { - Rect dst_fine = { - .start = { cp->extents[4 * comp_fine + 0], cp->extents[4 * comp_fine + 1]}, - .size = { cp->extents[4 * comp_fine + 2], cp->extents[4 * comp_fine + 3]}, - }; - Rect dst_coarse = { - .start = { dst_fine.start[0] / 2 - ghosts, dst_fine.start[1] / 2 - ghosts }, - .size = { dst_fine.size[0] / 2 + ghosts * 2, dst_fine.size[1] / 2 + ghosts * 2 }, - }; - - for (int comp_coarse = 0; comp_coarse < cp_coarse->nb_components; comp_coarse++) { - Rect overlap; - Rect src = { - .start = { cp_coarse->extents[4 * comp_coarse + 0], cp_coarse->extents[4 * comp_coarse + 1]}, - .size = { cp_coarse->extents[4 * comp_coarse + 2], cp_coarse->extents[4 * comp_coarse + 3]}, - }; - for (int dim = 0; dim < 2; dim++) { - if (src.start[dim] == 0) { - src.start[dim] -= ghosts; - src.size[dim] += ghosts; - } - } - - rect_intersect(&overlap, &dst_coarse, &src); - if (comp_fine == local_proc) { - if (overlap.size[0] > 0 && overlap.size[1] > 0) { - cp->interp_tmp_recvcounts[comp_coarse] = 1; - cp->interp_tmp_recvdispl[comp_coarse] = ((overlap.start[1] - dst_coarse.start[1]) * cp->interp_tmp_stride + - (overlap.start[0] - dst_coarse.start[0])) * sizeof(double); - MPI_Type_vector(overlap.size[1], overlap.size[0], cp->interp_tmp_stride, - MPI_DOUBLE, &cp->interp_tmp_recvtypes[comp_coarse]); - MPI_Type_commit(&cp->interp_tmp_recvtypes[comp_coarse]); - } else { - cp->interp_tmp_recvcounts[comp_coarse] = 0; - cp->interp_tmp_recvdispl[comp_coarse] = 0; - MPI_Type_dup(MPI_BYTE, &cp->interp_tmp_recvtypes[comp_coarse]); - } - } - if (comp_coarse == local_proc) { - if (overlap.size[0] > 0 && overlap.size[1] > 0) { - ptrdiff_t src_origin[2] = { cp_coarse->extents[4 * comp_coarse + 0] - cp_coarse->offset_left[0], - cp_coarse->extents[4 * comp_coarse + 1] - cp_coarse->offset_left[1] }; - cp->interp_tmp_sendcounts[comp_fine] = 1; - cp->interp_tmp_senddispl[comp_fine] = ((overlap.start[1] - src_origin[1]) * cp_coarse->grid_size[0] + - (overlap.start[0] - src_origin[0])) * sizeof(double); - MPI_Type_vector(overlap.size[1], overlap.size[0], cp_coarse->grid_size[0], - MPI_DOUBLE, &cp->interp_tmp_sendtypes[comp_fine]); - MPI_Type_commit(&cp->interp_tmp_sendtypes[comp_fine]); - } else { - cp->interp_tmp_sendcounts[comp_fine] = 0; - cp->interp_tmp_senddispl[comp_fine] = 0; - MPI_Type_dup(MPI_BYTE, &cp->interp_tmp_sendtypes[comp_fine]); - } - } - } - } - } - - MPI_Alltoallw(src, cp->interp_tmp_sendcounts, cp->interp_tmp_senddispl, cp->interp_tmp_sendtypes, - cp->interp_tmp_sync, cp->interp_tmp_recvcounts, cp->interp_tmp_recvdispl, cp->interp_tmp_recvtypes, - MPI_COMM_WORLD); - ret = mg2d_interp(cp->solver, dst + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), - cp->grid_size[0], (ptrdiff_t [2]){ cp->extents[4 * local_proc + 0], cp->extents[4 * local_proc + 1] }, - (size_t [2]){cp->extents[4 * local_proc + 2], cp->extents[4 * local_proc + 3] }, cp->solver->step, - cp->interp_tmp_sync, cp->interp_tmp_stride, cp->interp_tmp_start, cp->interp_tmp_size, cp_coarse->solver->step); - } else { - ret = mg2d_interp(cp->solver, dst + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), - cp->grid_size[0], (ptrdiff_t [2]){ 0, 0 }, (size_t [2]){cp->grid_size[0] - cp->offset_left[0], cp->grid_size[2] - cp->offset_left[1] }, - cp->solver->step, - src, cp_coarse->grid_size[0], (ptrdiff_t [2]){ -cp_coarse->offset_left[0], -cp_coarse->offset_left[1] }, - (size_t [2]){cp_coarse->grid_size[0], cp_coarse->grid_size[2] }, cp_coarse->solver->step); - } - - return ret; -} - static double smooth_step(double x) { if (x <= 0.0) @@ -1506,14 +1369,9 @@ void qms_mg_solve(CCTK_ARGUMENTS) skip_solve: start = gettime(); - { + if (reflevel >= ms->solve_level_max) { double *W_val_tl1 = CCTK_VarDataPtr(cctkGH, 1, "QuasiMaximalSlicingMG::W_val"); - - if (reflevel >= ms->solve_level_max) - solution_to_grid(cp, cp->solver, W_val); - else - memset(W_val, 0, sizeof(*W_val) * grid_size); - + solution_to_grid(cp, cp->solver, W_val); memcpy(W_val_tl1, W_val, grid_size * sizeof(*W_val_tl1)); } @@ -1543,8 +1401,6 @@ skip_solve: /* linearly extrapolate the past two solution to predict the next one */ { - const int local_proc = CCTK_MyProc(ms->gh); - const double time_src_0 = W_val0_time[reflevel]; const double time_src_1 = W_val1_time[reflevel]; const double pred_time = time_src_1 + dt; @@ -1557,16 +1413,15 @@ skip_solve: if (reflevel > 0 && reflevel <= ms->solve_level) { CoordPatch *cp_coarse = get_coord_patch(ms, reflevel - 1); + double *W_pred0_coarse = VarDataPtrI(ms->gh, 0, reflevel - 1, 0, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred0")); + double *W_pred1_coarse = VarDataPtrI(ms->gh, 0, reflevel - 1, 0, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred1")); + const int integrator_substeps = MoLNumIntegratorSubsteps(); + const int interior_size = cctk_lsh[0] - cp->offset_left[0] - cctk_nghostzones[0] * integrator_substeps * 2; - const ptrdiff_t bnd_idx = cp->interior_size[0] - cctk_nghostzones[0] * integrator_substeps * 2 - 1; - const double bnd_loc = bnd_idx * cp->solver->step[0]; + const double bnd_loc = x[CCTK_GFINDEX3D(ms->gh, cp->offset_left[0] + interior_size, 0, 0)]; const double transition_start = bnd_loc * 0.7; - const ptrdiff_t smooth_end[2] = { MIN(cp->extents[local_proc * 4 + 0] + cp->extents[local_proc * 4 + 2], bnd_idx), - MIN(cp->extents[local_proc * 4 + 1] + cp->extents[local_proc * 4 + 3], bnd_idx) }; - const ptrdiff_t smooth_size[2] = { smooth_end[0] - cp->extents[local_proc * 4 + 0], smooth_end[1] - cp->extents[local_proc * 4 + 1] }; - double tp0 = W_pred0_time[reflevel - 1]; double tp1 = W_pred1_time[reflevel - 1]; @@ -1575,18 +1430,24 @@ skip_solve: qms_assert(pred_time >= tp0 && pred_time <= tp1); - interp_coarse(ms, cp, cp->interp_tmp[0], cp_coarse, - VarDataPtrI(ms->gh, 0, reflevel - 1, local_proc, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred0"))); - interp_coarse(ms, cp, cp->interp_tmp[1], cp_coarse, - VarDataPtrI(ms->gh, 0, reflevel - 1, local_proc, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred1"))); + mg2d_interp(cp->solver, cp->interp_tmp0 + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), + cctk_lsh[0], (ptrdiff_t [2]){ 0, 0 }, (size_t [2]){cctk_lsh[0] - cp->offset_left[0], cctk_lsh[2] - cp->offset_left[1] }, + cp->solver->step, + W_pred0_coarse, cp_coarse->grid_size[0], (ptrdiff_t [2]){ -cp_coarse->offset_left[0], -cp_coarse->offset_left[1] }, + (size_t [2]){cp_coarse->grid_size[0], cp_coarse->grid_size[2] }, cp_coarse->solver->step); + mg2d_interp(cp->solver, cp->interp_tmp1 + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), + cctk_lsh[0], (ptrdiff_t [2]){ 0, 0 }, (size_t [2]){cctk_lsh[0] - cp->offset_left[0], cctk_lsh[2] - cp->offset_left[1] }, + cp->solver->step, + W_pred1_coarse, cp_coarse->grid_size[0], (ptrdiff_t [2]){ -cp_coarse->offset_left[0], -cp_coarse->offset_left[1] }, + (size_t [2]){cp_coarse->grid_size[0], cp_coarse->grid_size[2] }, cp_coarse->solver->step); #pragma omp parallel for for (int i = 0; i < grid_size; i++) - W_pred1[i] = factp0 * cp->interp_tmp[0][i] + factp1 * cp->interp_tmp[1][i]; + W_pred1[i] = factp0 * cp->interp_tmp0[i] + factp1 * cp->interp_tmp1[i]; #pragma omp parallel for - for (int idx_z = 0; idx_z < smooth_size[1]; idx_z++) - for (int idx_x = 0; idx_x < smooth_size[0]; idx_x++) { + for (int idx_z = 0; idx_z < interior_size; idx_z++) + for (int idx_x = 0; idx_x < interior_size; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(ms->gh, cp->offset_left[0] + idx_x, 0, cp->offset_left[1] + idx_z); @@ -1603,7 +1464,9 @@ skip_solve: } mirror(cp, W_pred1); } else if (reflevel == 0) { - const double bnd_loc = (cp->interior_size[0] - 1) * cp->solver->step[0]; + const int interior_size = cctk_lsh[0] - cp->offset_left[0] - cctk_nghostzones[0]; + + const double bnd_loc = x[CCTK_GFINDEX3D(ms->gh, cp->offset_left[0] + interior_size, 0, 0)]; const double transition_start = bnd_loc * 0.7; #pragma omp parallel for -- cgit v1.2.3