From 279b9f44f376c46927424c4f8cbd5197e4bd1575 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 13 Jun 2019 15:05:59 +0200 Subject: Handle components that lie entirely in the buffer zone. --- src/maximal_slicing_axi_mg.c | 196 +++++++++++++++++++++++++++++++------------ 1 file changed, 142 insertions(+), 54 deletions(-) diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 100f3c0..a3f8578 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -21,6 +21,7 @@ #define SQR(x) ((x) * (x)) #define ABS(x) ((x >= 0) ? (x) : -(x)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) #define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) @@ -65,9 +66,11 @@ typedef struct CoordPatch { ptrdiff_t grid_size[3]; MG2DContext *solver; + MPI_Comm solver_comm; int cur_step; MG2DContext **solver_eval; + MPI_Comm *solver_eval_comm; int nb_solver_eval; ptrdiff_t y_idx; @@ -158,9 +161,14 @@ static int ctz(int a) static void coord_patch_free(CoordPatch *cp) { mg2d_solver_free(&cp->solver); + if (cp->solver_comm != MPI_COMM_NULL) + MPI_Comm_free(&cp->solver_comm); - for (int i = 0; i < cp->nb_solver_eval; i++) + for (int i = 0; i < cp->nb_solver_eval; i++) { mg2d_solver_free(&cp->solver_eval[i]); + if (cp->solver_eval_comm[i] != MPI_COMM_NULL) + MPI_Comm_free(&cp->solver_eval_comm[i]); + } free(cp->solver_eval); cp->solver_eval = NULL; cp->nb_solver_eval = 0; @@ -186,19 +194,21 @@ static void log_callback(const MG2DContext *ctx, int level, vfprintf(stderr, fmt, vl); } -static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size, +static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t component_start[2], size_t component_size[2], - double step) + double step, MPI_Comm comm) { const char *omp_threads = getenv("OMP_NUM_THREADS"); MG2DContext *solver; - int multi_component = component_size[0] < domain_size || component_size[1] < domain_size; - if (multi_component) - solver = mg2d_solver_alloc_mpi(MPI_COMM_WORLD, component_start, component_size); + mg_assert(comm != MPI_COMM_NULL || + (component_start[0] == 0 && component_start[1] == 0 && component_size[0] == component_size[1])); + + if (comm != MPI_COMM_NULL) + solver = mg2d_solver_alloc_mpi(comm, component_start, component_size); else - solver = mg2d_solver_alloc(domain_size); + solver = mg2d_solver_alloc(component_size[0]); if (!solver) return NULL; @@ -326,6 +336,10 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) size_t *extents; size_t interior_size[2], component_start[2], component_size[2]; + + int *solver_ranks; + int nb_solver_ranks; + int level_size[2]; int bnd_intercomp[2][2]; int integrator_substeps = MoLNumIntegratorSubsteps(); @@ -371,53 +385,63 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (level_size[0] != level_size[1]) CCTK_WARN(0, "The refinement level is non-square, only square levels are supported"); - for (int dir = 0; dir < 2; dir++) { - ptrdiff_t offset_right; - - interior_size[dir] = component_size[dir]; - if (cp->bnd_intercomp[dir][1]) - continue; - - offset_right = gh->cctk_nghostzones[2 * dir]; - /* account for grid tapering on refined levels */ - if (cp->level > 0) - offset_right *= integrator_substeps * 2; - ///* the outer boundary layer overlaps with the first ghost zone */ - //if (cp->level > 0) - // offset_right++; - if (cp->level > 0) - offset_right--; - - interior_size[dir] = component_size[dir] - offset_right; - /* the outer boundary layer overlaps with the first ghost zone */ - if (cp->level > 0) - interior_size[dir]++; - } + solver_ranks = calloc(nb_procs, sizeof(*solver_ranks)); + if (!solver_ranks) + CCTK_WARN(0, "Error allocating solver ranks"); /* on the finest level we allocate a solver for each of the substeps * between the coarser-grid timelevels */ if (level == ms->nb_levels - 1) { cp->nb_solver_eval = 2 * integrator_substeps; cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval)); - if (!cp->solver_eval) + cp->solver_eval_comm = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval_comm)); + if (!cp->solver_eval || !cp->solver_eval_comm) CCTK_WARN(0, "Error allocating solvers"); for (int step = 0; step < 2; step++) for (int substep = 0; substep < integrator_substeps; substep++) { - int solver_idx = step * integrator_substeps + substep; + int solver_idx = step * integrator_substeps + substep; MG2DContext *s; size_t size[2]; - for (int dir = 0; dir < 2; dir++) { - size[dir] = component_size[dir]; - if (!cp->bnd_intercomp[dir][1]) - size[dir] -= (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx; + nb_solver_ranks = 0; + for (int proc = 0; proc < nb_procs; proc++) { + size_t size_tmp[2]; + for (int dir = 0; dir < 2; dir++) { + size_t offset = (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx; + size_t start = extents[4 * proc + 0 + dir]; + size_t end = MIN(start + extents[4 * proc + 2 + dir], level_size[dir] - offset); + size_tmp[dir] = end > start ? end - start : 0; + } + + if (size_tmp[0] > 0 && size_tmp[1] > 0) + solver_ranks[nb_solver_ranks++] = proc; + + if (proc == local_proc) { + size[0] = size_tmp[0]; + size[1] = size_tmp[1]; + } } - s = solver_alloc(ms, level, level_size[0], component_start, size, a_x[1] - a_x[0]); - if (!s) - CCTK_WARN(0, "Error allocating the solver"); - cp->solver_eval[solver_idx] = s; + if (nb_solver_ranks > 1) { + MPI_Group grp_world, grp_solver; + + MPI_Comm_group(MPI_COMM_WORLD, &grp_world); + MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver); + MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_eval_comm[solver_idx]); + MPI_Group_free(&grp_solver); + MPI_Group_free(&grp_world); + } else + cp->solver_eval_comm[solver_idx] = MPI_COMM_NULL; + + if (size[0] > 0 && size[1] > 0) { + s = solver_alloc(ms, level, component_start, size, + a_x[1] - a_x[0], cp->solver_eval_comm[solver_idx]); + if (!s) + CCTK_WARN(0, "Error allocating the solver"); + + cp->solver_eval[solver_idx] = s; + } } /* carpet does only syncs the domain interior, which includes the @@ -546,11 +570,62 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) } - cp->solver = solver_alloc(ms, level, level_size[0], component_start, - interior_size, a_x[1] - a_x[0]); - if (!cp->solver) - CCTK_WARN(0, "Error allocating the solver"); + nb_solver_ranks = 0; + for (int proc = 0; proc < nb_procs; proc++) { + size_t size[2]; + + for (int dir = 0; dir < 2; dir++) { + size_t start = extents[4 * proc + 0 + dir]; + size_t end = start + extents[4 * proc + 2 + dir]; + ptrdiff_t offset_right; + + offset_right = gh->cctk_nghostzones[2 * dir]; + /* account for grid tapering on refined levels */ + if (cp->level > 0) + offset_right *= integrator_substeps * 2; + ///* the outer boundary layer overlaps with the first ghost zone */ + //if (cp->level > 0) + // offset_right++; + if (cp->level > 0) + offset_right--; + if (cp->level > 0) + offset_right--; + + end = MIN(end, level_size[dir] - offset_right); + if (end > start) + size[dir] = end - start; + else + size[dir] = 0; + } + if (size[0] > 0 && size[1] > 0) + solver_ranks[nb_solver_ranks++] = proc; + + if (proc == local_proc) { + interior_size[0] = size[0]; + interior_size[1] = size[1]; + } + } + + if (nb_solver_ranks > 1) { + MPI_Group grp_world, grp_solver; + + MPI_Comm_group(MPI_COMM_WORLD, &grp_world); + MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver); + MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_comm); + MPI_Group_free(&grp_solver); + MPI_Group_free(&grp_world); + } else + cp->solver_comm = MPI_COMM_NULL; + + if (interior_size[0] > 0 && interior_size[1] > 0) { + cp->solver = solver_alloc(ms, level, component_start, interior_size, + a_x[1] - a_x[0], cp->solver_comm); + if (!cp->solver) + CCTK_WARN(0, "Error allocating the solver"); + } + + free(solver_ranks); free(extents); ms->nb_patches++; @@ -599,9 +674,12 @@ static void print_stats(MSMGContext *ms) snprintf(indent_buf, sizeof(indent_buf), " [%d] ", i); - mg2d_print_stats(cp->solver, indent_buf); + if (cp->solver) + mg2d_print_stats(cp->solver, indent_buf); for (int j = 0; j < cp->nb_solver_eval; j++) { + if (!cp->solver_eval[j]) + continue; snprintf(indent_buf, sizeof(indent_buf), " [%d/%d] ", i, j); mg2d_print_stats(cp->solver_eval[j], indent_buf); } @@ -898,6 +976,8 @@ void msa_mg_eval(CCTK_ARGUMENTS) solver_idx = (cp->cur_step & 1) * mol_substeps + (mol_substeps - *mol_step); solver = cp->solver_eval[solver_idx]; + if (!solver) + goto finish; start = gettime(); fill_eq_coeffs(ms, cp, solver); @@ -926,14 +1006,15 @@ void msa_mg_eval(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); start = gettime(); - if (!cp->bnd_intercomp[1][1]) { + + if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } } - if (!cp->bnd_intercomp[0][1]) { + if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); @@ -941,7 +1022,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) } } - if (!cp->bnd_intercomp[1][1]) { + if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; @@ -955,7 +1036,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) } } } - if (!cp->bnd_intercomp[0][1]) { + if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1042,6 +1123,12 @@ void msa_mg_solve(CCTK_ARGUMENTS) /* fill in the equation coefficients */ cp = get_coord_patch(ms, reflevel); + + // this happens when this component contains only the buffer points, + // so we skip the solve and only fill in the extrapolation values + if (!cp->solver) + goto skip_solve; + start = gettime(); fill_eq_coeffs(ms, cp, cp->solver); ms->time_solve_fill_coeffs += gettime() - start; @@ -1068,7 +1155,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); - if (!cp->bnd_intercomp[1][1]) { + if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); @@ -1076,7 +1163,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) } } } - if (!cp->bnd_intercomp[0][1]) { + if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); @@ -1100,7 +1187,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) */ /* fill the solver boundary conditions */ - if (!cp->bnd_intercomp[1][1]) { + if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1110,7 +1197,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) } } } - if (!cp->bnd_intercomp[0][1]) { + if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1138,6 +1225,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) memcpy(lapse_mg1, lapse_mg, grid_size * sizeof(*lapse_mg1)); ms->time_solve_export += gettime() - start; +skip_solve: /* update lapse history for extrapolation */ if (reflevel == 0 || !(timestep % 2)) { const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top)); @@ -1249,7 +1337,7 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS) if (!cp->bnd_intercomp[0][1]) { for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++) - for (int idx_x = cp->offset_left[0] + cp->solver->local_size[0] - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) { + for (int idx_x = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[0] - (ms->fd_stencil + 1) : 0); idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); epsdis[idx_dst] = 0.0; } @@ -1257,7 +1345,7 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS) if (!cp->bnd_intercomp[1][1]) { for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) - for (int idx_z = cp->offset_left[0] + cp->solver->local_size[1] - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) { + for (int idx_z = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[1] - (ms->fd_stencil + 1) : 0); idx_z < cp->grid_size[2]; idx_z++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); epsdis[idx_dst] = 0.0; } -- cgit v1.2.3