summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-06-13 15:05:59 +0200
committerAnton Khirnov <anton@khirnov.net>2019-06-13 15:05:59 +0200
commit279b9f44f376c46927424c4f8cbd5197e4bd1575 (patch)
tree9df25cba4734775fec1ffa40afa3e13d5459172c
parentac042bca49f5b269ac3cb4d30c4a191158dc4f25 (diff)
Handle components that lie entirely in the buffer zone.
-rw-r--r--src/maximal_slicing_axi_mg.c196
1 files changed, 142 insertions, 54 deletions
diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c
index 100f3c0..a3f8578 100644
--- a/src/maximal_slicing_axi_mg.c
+++ b/src/maximal_slicing_axi_mg.c
@@ -21,6 +21,7 @@
#define SQR(x) ((x) * (x))
#define ABS(x) ((x >= 0) ? (x) : -(x))
+#define MIN(x, y) ((x) > (y) ? (y) : (x))
#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
#define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i)
@@ -65,9 +66,11 @@ typedef struct CoordPatch {
ptrdiff_t grid_size[3];
MG2DContext *solver;
+ MPI_Comm solver_comm;
int cur_step;
MG2DContext **solver_eval;
+ MPI_Comm *solver_eval_comm;
int nb_solver_eval;
ptrdiff_t y_idx;
@@ -158,9 +161,14 @@ static int ctz(int a)
static void coord_patch_free(CoordPatch *cp)
{
mg2d_solver_free(&cp->solver);
+ if (cp->solver_comm != MPI_COMM_NULL)
+ MPI_Comm_free(&cp->solver_comm);
- for (int i = 0; i < cp->nb_solver_eval; i++)
+ for (int i = 0; i < cp->nb_solver_eval; i++) {
mg2d_solver_free(&cp->solver_eval[i]);
+ if (cp->solver_eval_comm[i] != MPI_COMM_NULL)
+ MPI_Comm_free(&cp->solver_eval_comm[i]);
+ }
free(cp->solver_eval);
cp->solver_eval = NULL;
cp->nb_solver_eval = 0;
@@ -186,19 +194,21 @@ static void log_callback(const MG2DContext *ctx, int level,
vfprintf(stderr, fmt, vl);
}
-static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size,
+static MG2DContext *solver_alloc(MSMGContext *ms, int level,
size_t component_start[2], size_t component_size[2],
- double step)
+ double step, MPI_Comm comm)
{
const char *omp_threads = getenv("OMP_NUM_THREADS");
MG2DContext *solver;
- int multi_component = component_size[0] < domain_size || component_size[1] < domain_size;
- if (multi_component)
- solver = mg2d_solver_alloc_mpi(MPI_COMM_WORLD, component_start, component_size);
+ mg_assert(comm != MPI_COMM_NULL ||
+ (component_start[0] == 0 && component_start[1] == 0 && component_size[0] == component_size[1]));
+
+ if (comm != MPI_COMM_NULL)
+ solver = mg2d_solver_alloc_mpi(comm, component_start, component_size);
else
- solver = mg2d_solver_alloc(domain_size);
+ solver = mg2d_solver_alloc(component_size[0]);
if (!solver)
return NULL;
@@ -326,6 +336,10 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
size_t *extents;
size_t interior_size[2], component_start[2], component_size[2];
+
+ int *solver_ranks;
+ int nb_solver_ranks;
+
int level_size[2];
int bnd_intercomp[2][2];
int integrator_substeps = MoLNumIntegratorSubsteps();
@@ -371,53 +385,63 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
if (level_size[0] != level_size[1])
CCTK_WARN(0, "The refinement level is non-square, only square levels are supported");
- for (int dir = 0; dir < 2; dir++) {
- ptrdiff_t offset_right;
-
- interior_size[dir] = component_size[dir];
- if (cp->bnd_intercomp[dir][1])
- continue;
-
- offset_right = gh->cctk_nghostzones[2 * dir];
- /* account for grid tapering on refined levels */
- if (cp->level > 0)
- offset_right *= integrator_substeps * 2;
- ///* the outer boundary layer overlaps with the first ghost zone */
- //if (cp->level > 0)
- // offset_right++;
- if (cp->level > 0)
- offset_right--;
-
- interior_size[dir] = component_size[dir] - offset_right;
- /* the outer boundary layer overlaps with the first ghost zone */
- if (cp->level > 0)
- interior_size[dir]++;
- }
+ solver_ranks = calloc(nb_procs, sizeof(*solver_ranks));
+ if (!solver_ranks)
+ CCTK_WARN(0, "Error allocating solver ranks");
/* on the finest level we allocate a solver for each of the substeps
* between the coarser-grid timelevels */
if (level == ms->nb_levels - 1) {
cp->nb_solver_eval = 2 * integrator_substeps;
cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval));
- if (!cp->solver_eval)
+ cp->solver_eval_comm = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval_comm));
+ if (!cp->solver_eval || !cp->solver_eval_comm)
CCTK_WARN(0, "Error allocating solvers");
for (int step = 0; step < 2; step++)
for (int substep = 0; substep < integrator_substeps; substep++) {
- int solver_idx = step * integrator_substeps + substep;
+ int solver_idx = step * integrator_substeps + substep;
MG2DContext *s;
size_t size[2];
- for (int dir = 0; dir < 2; dir++) {
- size[dir] = component_size[dir];
- if (!cp->bnd_intercomp[dir][1])
- size[dir] -= (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx;
+ nb_solver_ranks = 0;
+ for (int proc = 0; proc < nb_procs; proc++) {
+ size_t size_tmp[2];
+ for (int dir = 0; dir < 2; dir++) {
+ size_t offset = (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx;
+ size_t start = extents[4 * proc + 0 + dir];
+ size_t end = MIN(start + extents[4 * proc + 2 + dir], level_size[dir] - offset);
+ size_tmp[dir] = end > start ? end - start : 0;
+ }
+
+ if (size_tmp[0] > 0 && size_tmp[1] > 0)
+ solver_ranks[nb_solver_ranks++] = proc;
+
+ if (proc == local_proc) {
+ size[0] = size_tmp[0];
+ size[1] = size_tmp[1];
+ }
}
- s = solver_alloc(ms, level, level_size[0], component_start, size, a_x[1] - a_x[0]);
- if (!s)
- CCTK_WARN(0, "Error allocating the solver");
- cp->solver_eval[solver_idx] = s;
+ if (nb_solver_ranks > 1) {
+ MPI_Group grp_world, grp_solver;
+
+ MPI_Comm_group(MPI_COMM_WORLD, &grp_world);
+ MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver);
+ MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_eval_comm[solver_idx]);
+ MPI_Group_free(&grp_solver);
+ MPI_Group_free(&grp_world);
+ } else
+ cp->solver_eval_comm[solver_idx] = MPI_COMM_NULL;
+
+ if (size[0] > 0 && size[1] > 0) {
+ s = solver_alloc(ms, level, component_start, size,
+ a_x[1] - a_x[0], cp->solver_eval_comm[solver_idx]);
+ if (!s)
+ CCTK_WARN(0, "Error allocating the solver");
+
+ cp->solver_eval[solver_idx] = s;
+ }
}
/* carpet does only syncs the domain interior, which includes the
@@ -546,11 +570,62 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
}
- cp->solver = solver_alloc(ms, level, level_size[0], component_start,
- interior_size, a_x[1] - a_x[0]);
- if (!cp->solver)
- CCTK_WARN(0, "Error allocating the solver");
+ nb_solver_ranks = 0;
+ for (int proc = 0; proc < nb_procs; proc++) {
+ size_t size[2];
+
+ for (int dir = 0; dir < 2; dir++) {
+ size_t start = extents[4 * proc + 0 + dir];
+ size_t end = start + extents[4 * proc + 2 + dir];
+ ptrdiff_t offset_right;
+
+ offset_right = gh->cctk_nghostzones[2 * dir];
+ /* account for grid tapering on refined levels */
+ if (cp->level > 0)
+ offset_right *= integrator_substeps * 2;
+ ///* the outer boundary layer overlaps with the first ghost zone */
+ //if (cp->level > 0)
+ // offset_right++;
+ if (cp->level > 0)
+ offset_right--;
+ if (cp->level > 0)
+ offset_right--;
+
+ end = MIN(end, level_size[dir] - offset_right);
+ if (end > start)
+ size[dir] = end - start;
+ else
+ size[dir] = 0;
+ }
+ if (size[0] > 0 && size[1] > 0)
+ solver_ranks[nb_solver_ranks++] = proc;
+
+ if (proc == local_proc) {
+ interior_size[0] = size[0];
+ interior_size[1] = size[1];
+ }
+ }
+
+ if (nb_solver_ranks > 1) {
+ MPI_Group grp_world, grp_solver;
+
+ MPI_Comm_group(MPI_COMM_WORLD, &grp_world);
+ MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver);
+ MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_comm);
+ MPI_Group_free(&grp_solver);
+ MPI_Group_free(&grp_world);
+ } else
+ cp->solver_comm = MPI_COMM_NULL;
+
+ if (interior_size[0] > 0 && interior_size[1] > 0) {
+ cp->solver = solver_alloc(ms, level, component_start, interior_size,
+ a_x[1] - a_x[0], cp->solver_comm);
+ if (!cp->solver)
+ CCTK_WARN(0, "Error allocating the solver");
+ }
+
+ free(solver_ranks);
free(extents);
ms->nb_patches++;
@@ -599,9 +674,12 @@ static void print_stats(MSMGContext *ms)
snprintf(indent_buf, sizeof(indent_buf), " [%d] ", i);
- mg2d_print_stats(cp->solver, indent_buf);
+ if (cp->solver)
+ mg2d_print_stats(cp->solver, indent_buf);
for (int j = 0; j < cp->nb_solver_eval; j++) {
+ if (!cp->solver_eval[j])
+ continue;
snprintf(indent_buf, sizeof(indent_buf), " [%d/%d] ", i, j);
mg2d_print_stats(cp->solver_eval[j], indent_buf);
}
@@ -898,6 +976,8 @@ void msa_mg_eval(CCTK_ARGUMENTS)
solver_idx = (cp->cur_step & 1) * mol_substeps + (mol_substeps - *mol_step);
solver = cp->solver_eval[solver_idx];
+ if (!solver)
+ goto finish;
start = gettime();
fill_eq_coeffs(ms, cp, solver);
@@ -926,14 +1006,15 @@ void msa_mg_eval(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
start = gettime();
- if (!cp->bnd_intercomp[1][1]) {
+
+ if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) {
for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
}
}
- if (!cp->bnd_intercomp[0][1]) {
+ if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) {
for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
@@ -941,7 +1022,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
}
}
- if (!cp->bnd_intercomp[1][1]) {
+ if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) {
for (int j = 0; j < solver->fd_stencil; j++) {
MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -955,7 +1036,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
}
}
}
- if (!cp->bnd_intercomp[0][1]) {
+ if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) {
for (int j = 0; j < solver->fd_stencil; j++) {
MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1042,6 +1123,12 @@ void msa_mg_solve(CCTK_ARGUMENTS)
/* fill in the equation coefficients */
cp = get_coord_patch(ms, reflevel);
+
+ // this happens when this component contains only the buffer points,
+ // so we skip the solve and only fill in the extrapolation values
+ if (!cp->solver)
+ goto skip_solve;
+
start = gettime();
fill_eq_coeffs(ms, cp, cp->solver);
ms->time_solve_fill_coeffs += gettime() - start;
@@ -1068,7 +1155,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
- if (!cp->bnd_intercomp[1][1]) {
+ if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) {
for (int j = 0; j < cp->solver->fd_stencil; j++) {
for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j);
@@ -1076,7 +1163,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
}
}
}
- if (!cp->bnd_intercomp[0][1]) {
+ if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) {
for (int j = 0; j < cp->solver->fd_stencil; j++) {
for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i);
@@ -1100,7 +1187,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
*/
/* fill the solver boundary conditions */
- if (!cp->bnd_intercomp[1][1]) {
+ if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) {
for (int j = 0; j < cp->solver->fd_stencil; j++) {
MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1110,7 +1197,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
}
}
}
- if (!cp->bnd_intercomp[0][1]) {
+ if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) {
for (int j = 0; j < cp->solver->fd_stencil; j++) {
MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1138,6 +1225,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
memcpy(lapse_mg1, lapse_mg, grid_size * sizeof(*lapse_mg1));
ms->time_solve_export += gettime() - start;
+skip_solve:
/* update lapse history for extrapolation */
if (reflevel == 0 || !(timestep % 2)) {
const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top));
@@ -1249,7 +1337,7 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS)
if (!cp->bnd_intercomp[0][1]) {
for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++)
- for (int idx_x = cp->offset_left[0] + cp->solver->local_size[0] - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) {
+ for (int idx_x = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[0] - (ms->fd_stencil + 1) : 0); idx_x < cp->grid_size[0]; idx_x++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
epsdis[idx_dst] = 0.0;
}
@@ -1257,7 +1345,7 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS)
if (!cp->bnd_intercomp[1][1]) {
for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++)
- for (int idx_z = cp->offset_left[0] + cp->solver->local_size[1] - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) {
+ for (int idx_z = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[1] - (ms->fd_stencil + 1) : 0); idx_z < cp->grid_size[2]; idx_z++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
epsdis[idx_dst] = 0.0;
}