summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-05-24 13:08:08 +0200
committerAnton Khirnov <anton@khirnov.net>2019-06-02 17:34:48 +0200
commit1afea75597177b736429f8f6368bb5ecd0f0a389 (patch)
tree9074692724c4c28e98e5e18b7b39183b30f35f0c
parente6a0b26fdbbf2d7669de937c67c8da48ce07cc56 (diff)
Add support for MPI.
-rw-r--r--configuration.ccl1
-rw-r--r--interface.ccl4
-rw-r--r--schedule.ccl3
-rw-r--r--src/maximal_slicing_axi_mg.c557
4 files changed, 436 insertions, 129 deletions
diff --git a/configuration.ccl b/configuration.ccl
index f124283..4a3a32b 100644
--- a/configuration.ccl
+++ b/configuration.ccl
@@ -1,2 +1,3 @@
# Configuration definition for thorn MaximalSlicingAxiMG
+REQUIRES MPI
diff --git a/interface.ccl b/interface.ccl
index 124823e..8ebef12 100644
--- a/interface.ccl
+++ b/interface.ccl
@@ -19,5 +19,5 @@ REAL lapse_mg TYPE=GF TIMELEVELS=2
REAL lapse_mg_eval TYPE=GF tags='Prolongation="None"'
REAL lapse_prev0 TYPE=GF tags='Prolongation="None"'
REAL lapse_prev1 TYPE=GF tags='Prolongation="None"'
-REAL lapse_prev0_time TYPE=ARRAY DIM=1 SIZE=32
-REAL lapse_prev1_time TYPE=ARRAY DIM=1 SIZE=32
+REAL lapse_prev0_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant
+REAL lapse_prev1_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant
diff --git a/schedule.ccl b/schedule.ccl
index 673a151..08327e9 100644
--- a/schedule.ccl
+++ b/schedule.ccl
@@ -14,6 +14,7 @@ if (CCTK_Equals(lapse_evolution_method, "maximal_axi_mg")) {
} ""
SCHEDULE msa_mg_eval IN MoL_CalcRHS BEFORE ML_BSSN_evolCalcGroup {
+ SYNC: ML_lapse
LANG: C
} ""
@@ -23,6 +24,8 @@ if (CCTK_Equals(lapse_evolution_method, "maximal_axi_mg")) {
} ""
SCHEDULE msa_mg_solve IN CCTK_POSTSTEP {
+ SYNC: ML_lapse
+ SYNC: lapse_mg
LANG: C
} ""
diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c
index 69832cf..cf56fb9 100644
--- a/src/maximal_slicing_axi_mg.c
+++ b/src/maximal_slicing_axi_mg.c
@@ -9,6 +9,7 @@
#include <string.h>
#include <mg2d.h>
+#include <mg2d_boundary.h>
#include "cctk.h"
#include "cctk_Arguments.h"
@@ -16,6 +17,8 @@
#include "cctk_Timers.h"
#include "util_Table.h"
+#include <mpi.h>
+
#define SQR(x) ((x) * (x))
#define ABS(x) ((x >= 0) ? (x) : -(x))
#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
@@ -28,6 +31,14 @@
#define LOGDEBUG
#endif
+#define mg_assert(x) \
+do { \
+ if (!(x)) { \
+ fprintf(stderr, "Assertion " #x " failed\n"); \
+ abort(); \
+ } \
+} while (0)
+
static const struct {
const char *str;
enum MG2DLogLevel level;
@@ -64,6 +75,17 @@ typedef struct CoordPatch {
/* number of x/z grid points by which the elliptic solver domain is offset
* from the cactus grid */
ptrdiff_t offset_left[2];
+ int bnd_intercomp[2][2];
+
+ /* MPI sync for the outer ghostzones */
+ MPI_Datatype synctype;
+ int nb_components;
+ int *sendcounts;
+ int *senddispl;
+ MPI_Datatype *sendtypes;
+ int *recvcounts;
+ int *recvdispl;
+ MPI_Datatype *recvtypes;
} CoordPatch;
typedef struct MSMGContext {
@@ -112,6 +134,8 @@ typedef struct MSMGContext {
int64_t time_fine_mg2d;
int64_t time_fine_export;
int64_t time_init_guess_eval;
+
+ int64_t time_mpi_sync;
} MSMGContext;
static MSMGContext *ms;
@@ -140,6 +164,15 @@ static void coord_patch_free(CoordPatch *cp)
free(cp->solver_eval);
cp->solver_eval = NULL;
cp->nb_solver_eval = 0;
+
+ if (cp->synctype)
+ MPI_Type_free(&cp->synctype);
+ free(cp->sendcounts);
+ free(cp->senddispl);
+ free(cp->sendtypes);
+ free(cp->recvcounts);
+ free(cp->recvdispl);
+ free(cp->recvtypes);
}
static void log_callback(const MG2DContext *ctx, int level,
@@ -154,13 +187,18 @@ static void log_callback(const MG2DContext *ctx, int level,
}
static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size,
- double step)
+ size_t component_start[2], size_t component_size[2],
+ double step)
{
const char *omp_threads = getenv("OMP_NUM_THREADS");
MG2DContext *solver;
+ int multi_component = component_size[0] < domain_size || component_size[1] < domain_size;
- solver = mg2d_solver_alloc(domain_size);
+ if (multi_component)
+ solver = mg2d_solver_alloc_mpi(MPI_COMM_WORLD, component_start, component_size);
+ else
+ solver = mg2d_solver_alloc(domain_size);
if (!solver)
return NULL;
@@ -196,25 +234,100 @@ static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size,
/* initialize boundary values to zero,
* non-zero values on the outer boundaries of refined levels are filled in elsewhere */
for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < solver->fd_stencil; j++)
+ int ci = mg2d_bnd_coord_idx(i);
+ for (size_t j = 0; j < solver->fd_stencil; j++) {
memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j,
- 0, sizeof(*solver->boundaries[i]->val) * (domain_size + 2 * j));
+ 0, sizeof(*solver->boundaries[i]->val) * (component_size[!ci] + 2 * j));
+ }
}
return solver;
}
+/**
+ * Compute the refinement level geometry.
+ *
+ * The level is assumed to be a single square patch that is composed of
+ * nProcs components. The lower x and z boundaries are assumed to be reflection.
+ * For our purposes, both the physical outer boundaries and the refinement
+ * boundaries are equivalent, while the intercomponent boundaries are treated
+ * differently. Since Cactus only allows distinguishing between "physical" and
+ * "all other" boundaries, we need to compute the extents manually.
+ *
+ * @param level_size will contain the number of points in x/z directions on the
+ * complete refinement level, including all the ghost zones up to the outer
+ * level boundary.
+ *
+ * @param component_start indices of the component origin in the refinement level
+ *
+ * @param component_size number of points owned by this component; when the
+ * component is on the outer level boundary, this includes all the ghost zones
+ * up to the outer boundary.
+ */
+static void get_extents(size_t **pextents, int *level_size, cGH *gh)
+{
+ int nb_procs = CCTK_nProcs(gh);
+ int local_proc = CCTK_MyProc(gh);
+
+ size_t *extents;
+
+ extents = calloc(nb_procs, 4 * sizeof(*extents));
+ if (!extents)
+ CCTK_WARN(0, "Error allocating the extents");
+
+ for (int dir = 0; dir < 2; dir++) {
+ extents[4 * local_proc + 0 + dir] = gh->cctk_lbnd[2 * dir];
+ extents[4 * local_proc + 2 + dir] = gh->cctk_lsh[2 * dir] - gh->cctk_nghostzones[2 * dir];
+ }
+
+ if (nb_procs > 1)
+ MPI_Allgather(MPI_IN_PLACE, 0, NULL, extents, 4 * sizeof(*extents), MPI_BYTE, MPI_COMM_WORLD);
+
+ level_size[0] = 0;
+ level_size[1] = 0;
+
+ for (int dir = 0; dir < 2; dir++) {
+ for (int comp = 0; comp < nb_procs; comp++) {
+ size_t start = extents[4 * comp + 0 + dir];
+ size_t size = extents[4 * comp + 2 + dir];
+ if (start + size > level_size[dir])
+ level_size[dir] = start + size;
+ }
+
+ // if the upper boundary is an inter-component boundary, substract the ghost zones
+ for (int comp = 0; comp < nb_procs; comp++) {
+ size_t start = extents[4 * comp + 0 + dir];
+ size_t size = extents[4 * comp + 2 + dir];
+
+ if (start + size < level_size[dir])
+ extents[4 * comp + 2 + dir] -= gh->cctk_nghostzones[2 * dir];
+ }
+ }
+
+ *pextents = extents;
+}
+
static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
{
cGH *gh = ms->gh;
+ const int nb_procs = CCTK_nProcs(gh);
+ const int local_proc = CCTK_MyProc(gh);
+
const size_t grid_size = gh->cctk_lsh[2] * gh->cctk_lsh[1] * gh->cctk_lsh[0];
const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x");
const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y");
const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z");
+ const int gx = gh->cctk_nghostzones[0];
+ const int gz = gh->cctk_nghostzones[2];
+
CoordPatch *cp;
- size_t domain_size[2];
+
+ size_t *extents;
+ size_t interior_size[2], component_start[2], component_size[2];
+ int level_size[2];
+ int bnd_intercomp[2][2];
int integrator_substeps = MoLNumIntegratorSubsteps();
int i, ret;
@@ -244,10 +357,28 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
if (i == gh->cctk_lsh[1])
CCTK_WARN(0, "The grid does not include y==0");
- for (int i = 0; i < 2; i++) {
- ptrdiff_t offset_left = ms->gh->cctk_nghostzones[2 * i];
- ptrdiff_t offset_right = offset_left;
+ get_extents(&extents, level_size, gh);
+ component_start[0] = extents[4 * local_proc + 0 + 0];
+ component_start[1] = extents[4 * local_proc + 0 + 1];
+ component_size[0] = extents[4 * local_proc + 2 + 0];
+ component_size[1] = extents[4 * local_proc + 2 + 1];
+ for (int dir = 0; dir < 2; dir++) {
+ cp->bnd_intercomp[dir][0] = component_start[dir] > 0;
+ cp->bnd_intercomp[dir][1] = (component_start[dir] + component_size[dir]) < level_size[dir];
+ cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir];
+ }
+
+ if (level_size[0] != level_size[1])
+ CCTK_WARN(0, "The refinement level is non-square, only square levels are supported");
+
+ for (int dir = 0; dir < 2; dir++) {
+ ptrdiff_t offset_right;
+ interior_size[dir] = component_size[dir];
+ if (cp->bnd_intercomp[dir][1])
+ continue;
+
+ offset_right = gh->cctk_nghostzones[2 * dir];
/* account for grid tapering on refined levels */
if (cp->level > 0)
offset_right *= integrator_substeps * 2;
@@ -257,41 +388,171 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level)
if (cp->level > 0)
offset_right--;
- domain_size[i] = ms->gh->cctk_lsh[2 * i] - offset_left - offset_right;
+ interior_size[dir] = component_size[dir] - offset_right;
/* the outer boundary layer overlaps with the first ghost zone */
if (cp->level > 0)
- domain_size[i]++;
-
- cp->offset_left[i] = offset_left;
+ interior_size[dir]++;
}
- if (domain_size[0] != domain_size[1])
- CCTK_WARN(0, "The grid is non-square, only square grids are supported");
-
- cp->solver = solver_alloc(ms, level, domain_size[0], a_x[1] - a_x[0]);
- if (!cp->solver)
- CCTK_WARN(0, "Error allocating the solver");
-
- /* on the coarsest levels we allocate a solver for each of the substeps
+ /* on the finest level we allocate a solver for each of the substeps
* between the coarser-grid timelevels */
if (level == ms->nb_levels - 1) {
- cp->nb_solver_eval = 2 * integrator_substeps;
- cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval));
+ cp->nb_solver_eval = 2 * integrator_substeps;
+ cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval));
if (!cp->solver_eval)
CCTK_WARN(0, "Error allocating solvers");
for (int step = 0; step < 2; step++)
for (int substep = 0; substep < integrator_substeps; substep++) {
- size_t size = cp->grid_size[0] - cp->offset_left[0] - (ms->fd_stencil - 1)
- - ms->gh->cctk_nghostzones[0] * (step * integrator_substeps + substep);
- MG2DContext *s = solver_alloc(ms, level, size, a_x[1] - a_x[0]);
+ int solver_idx = step * integrator_substeps + substep;
+ MG2DContext *s;
+ size_t size[2];
+
+ for (int dir = 0; dir < 2; dir++) {
+ size[dir] = component_size[dir];
+ if (!cp->bnd_intercomp[dir][1])
+ size[dir] -= (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx;
+ }
+ s = solver_alloc(ms, level, level_size[0], component_start, size, a_x[1] - a_x[0]);
if (!s)
CCTK_WARN(0, "Error allocating the solver");
- cp->solver_eval[step * integrator_substeps + substep] = s;
+ cp->solver_eval[solver_idx] = s;
+ }
+
+ /* carpet does only syncs the domain interior, which includes the
+ * buffer points, but excludes the outermost layer of ghost points,
+ * which are normally filled by prolongation from the coarser grid.
+ * since this solver sets those points after the solve for the first MoL
+ * step, we need to sync them manually */
+ if (nb_procs > 1) {
+ int is_upper_x = component_start[0] + component_size[0] == level_size[0];
+ int is_upper_z = component_start[1] + component_size[1] == level_size[1];
+ int comp;
+
+ cp->sendcounts = calloc(nb_procs, sizeof(*cp->sendcounts));
+ cp->senddispl = calloc(nb_procs, sizeof(*cp->senddispl));
+ cp->sendtypes = calloc(nb_procs, sizeof(*cp->sendtypes));
+ cp->recvcounts = calloc(nb_procs, sizeof(*cp->recvcounts));
+ cp->recvdispl = calloc(nb_procs, sizeof(*cp->recvdispl));
+ cp->recvtypes = calloc(nb_procs, sizeof(*cp->recvtypes));
+ if (!cp->sendcounts || !cp->senddispl || !cp->sendtypes ||
+ !cp->recvcounts || !cp->recvdispl || !cp->recvtypes)
+ CCTK_WARN(0, "Error allocating sync arrays");
+ cp->nb_components = nb_procs;
+
+ MPI_Type_vector(gz, gx, ms->gh->cctk_lsh[0], MPI_DOUBLE, &cp->synctype);
+ MPI_Type_commit(&cp->synctype);
+
+ /* this component touches the upper z boundary */
+ if (is_upper_z) {
+ /* there is a component to the left of us */
+ if (component_start[0] > 0) {
+ for (comp = 0; comp < nb_procs; comp++) {
+ size_t comp_start_x = extents[4 * comp + 0 + 0];
+ size_t comp_start_z = extents[4 * comp + 0 + 1];
+ size_t comp_size_x = extents[4 * comp + 2 + 0];
+ size_t comp_size_z = extents[4 * comp + 2 + 1];
+
+ if (comp_start_z + comp_size_z == level_size[1] &&
+ comp_start_x + comp_size_x == component_start[0])
+ break;
+ }
+ mg_assert(comp < nb_procs);
+ cp->sendcounts[comp] = 1;
+ cp->recvcounts[comp] = 1;
+ cp->sendtypes[comp] = cp->synctype;
+ cp->recvtypes[comp] = cp->synctype;
+ cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + gx) * sizeof(double);
+ cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + 0) * sizeof(double);
+ }
+ /* there is a component to the right of us */
+ if (!is_upper_x) {
+ for (comp = 0; comp < nb_procs; comp++) {
+ size_t comp_start_x = extents[4 * comp + 0 + 0];
+ size_t comp_start_z = extents[4 * comp + 0 + 1];
+ size_t comp_size_x = extents[4 * comp + 2 + 0];
+ size_t comp_size_z = extents[4 * comp + 2 + 1];
+
+ if (comp_start_z + comp_size_z == level_size[1] &&
+ comp_start_x == component_start[0] + component_size[0])
+ break;
+ }
+ mg_assert(comp < nb_procs);
+ cp->sendcounts[comp] = 1;
+ cp->recvcounts[comp] = 1;
+ cp->sendtypes[comp] = cp->synctype;
+ cp->recvtypes[comp] = cp->synctype;
+ cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] +
+ (gh->cctk_lsh[0] - gx * 2)) * sizeof(double);
+ cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] +
+ (gh->cctk_lsh[0] - gx)) * sizeof(double);
+ }
+ }
+ /* this component touches the upper x boundary */
+ if (is_upper_x) {
+ /* there is a component below us */
+ if (component_start[1] > 0) {
+ for (comp = 0; comp < nb_procs; comp++) {
+ size_t comp_start_x = extents[4 * comp + 0 + 0];
+ size_t comp_start_z = extents[4 * comp + 0 + 1];
+ size_t comp_size_x = extents[4 * comp + 2 + 0];
+ size_t comp_size_z = extents[4 * comp + 2 + 1];
+
+ if (comp_start_x + comp_size_x == level_size[0] &&
+ comp_start_z + comp_size_z == component_start[1])
+ break;
+ }
+ mg_assert(comp < nb_procs);
+ cp->sendcounts[comp] = 1;
+ cp->recvcounts[comp] = 1;
+ cp->sendtypes[comp] = cp->synctype;
+ cp->recvtypes[comp] = cp->synctype;
+ cp->senddispl[comp] = (gz * gh->cctk_lsh[0] + gh->cctk_lsh[0] - gx) * sizeof(double);
+ cp->recvdispl[comp] = ( + gh->cctk_lsh[0] - gx) * sizeof(double);
+ }
+ /* there is a component above us */
+ if (!is_upper_z) {
+ for (comp = 0; comp < nb_procs; comp++) {
+ size_t comp_start_x = extents[4 * comp + 0 + 0];
+ size_t comp_start_z = extents[4 * comp + 0 + 1];
+ size_t comp_size_x = extents[4 * comp + 2 + 0];
+ size_t comp_size_z = extents[4 * comp + 2 + 1];
+
+ if (comp_start_x + comp_size_x == level_size[0] &&
+ comp_start_z == component_start[1] + component_size[1])
+ break;
+ }
+ mg_assert(comp < nb_procs);
+ cp->sendcounts[comp] = 1;
+ cp->recvcounts[comp] = 1;
+ cp->sendtypes[comp] = cp->synctype;
+ cp->recvtypes[comp] = cp->synctype;
+ cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz * 2) * gh->cctk_lsh[0] +
+ (gh->cctk_lsh[0] - gx)) * sizeof(double);
+ cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] +
+ (gh->cctk_lsh[0] - gx)) * sizeof(double);
+ }
+ }
+
+ for (int comp = 0; comp < nb_procs; comp++) {
+ mg_assert(!!cp->sendtypes[comp] == !!cp->recvtypes[comp]);
+ if (!cp->sendtypes[comp]) {
+ cp->sendtypes[comp] = MPI_BYTE;
+ cp->recvtypes[comp] = MPI_BYTE;
+ }
}
+ }
+
}
+ cp->solver = solver_alloc(ms, level, level_size[0], component_start,
+ interior_size, a_x[1] - a_x[0]);
+ if (!cp->solver)
+ CCTK_WARN(0, "Error allocating the solver");
+
+ free(extents);
+
ms->nb_patches++;
return cp;
}
@@ -436,13 +697,14 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver
double *a_Xtx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt1");
double *a_Xtz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt3");
- for (int idx_z = 0; idx_z < solver->domain_size; idx_z++)
- for (int idx_x = 0; idx_x < solver->domain_size; idx_x++) {
+ for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++)
+ for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) {
const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]);
const int idx_dc = idx_z * solver->diff_coeffs_stride + idx_x;
const int idx_rhs = idx_z * solver->rhs_stride + idx_x;
const double x = a_x[idx_src];
+ const int on_axis = fabs(x) < 1e-13;
const double gtxx = a_gtxx[idx_src];
const double gtyy = a_gtyy[idx_src];
@@ -508,10 +770,10 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver
Xx = SQR(phi) * (Xtx + (phi_dx * gtu[0][0] + phi_dz * gtu[0][2]) / phi);
Xz = SQR(phi) * (Xtz + (phi_dx * gtu[0][2] + phi_dz * gtu[2][2]) / phi);
- solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + ((idx_x == 0) ? gtu[1][1] : 0.0));
+ solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + (on_axis ? gtu[1][1] : 0.0));
solver->diff_coeffs[MG2D_DIFF_COEFF_02][idx_dc] = SQR(phi) * gtu[2][2];
solver->diff_coeffs[MG2D_DIFF_COEFF_11][idx_dc] = SQR(phi) * gtu[0][2] * 2;
- solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (idx_x ? SQR(phi) * gtu[1][1] / x : 0.0);
+ solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (on_axis ? 0.0 : SQR(phi) * gtu[1][1] / x);
solver->diff_coeffs[MG2D_DIFF_COEFF_01][idx_dc] = -Xz;
solver->diff_coeffs[MG2D_DIFF_COEFF_00][idx_dc] = -k2;
solver->rhs[idx_rhs] = k2;
@@ -520,8 +782,8 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver
static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
{
- for (int j = 0; j < solver->domain_size; j++)
- for (int i = 0; i < solver->domain_size; i++) {
+ for (int j = 0; j < solver->local_size[1]; j++)
+ for (int i = 0; i < solver->local_size[0]; i++) {
const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]);
const int idx_src = j * solver->u_stride + i;
dst[idx_dst] = 1.0 + solver->u[idx_src];
@@ -529,34 +791,42 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
if (cp->level == 0) {
/* on the coarsest level, extrapolate the outer boundary ghost points */
- for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->domain_size; idx_z++)
- for (int idx_x = cp->offset_left[1] + solver->domain_size; idx_x < cp->grid_size[0]; idx_x++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2];
- }
+ if (!cp->bnd_intercomp[0][1]) {
+ for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->local_size[1]; idx_z++)
+ for (int idx_x = cp->offset_left[0] + solver->local_size[0]; idx_x < cp->grid_size[0]; idx_x++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2];
+ }
+ }
- for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++)
- for (int idx_z = cp->offset_left[1] + solver->domain_size; idx_z < cp->grid_size[2]; idx_z++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1);
- const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2);
- dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1];
- }
+ if (!cp->bnd_intercomp[1][1]) {
+ for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++)
+ for (int idx_z = cp->offset_left[1] + solver->local_size[1]; idx_z < cp->grid_size[2]; idx_z++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1);
+ const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2);
+ dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1];
+ }
+ }
}
/* fill in the axial symmetry ghostpoints by mirroring */
- for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++)
- for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z);
- dst[idx_dst] = dst[idx_src];
- }
- for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++)
- for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z);
- dst[idx_dst] = dst[idx_src];
- }
+ if (!cp->bnd_intercomp[0][0]) {
+ for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++)
+ for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z);
+ dst[idx_dst] = dst[idx_src];
+ }
+ }
+ if (!cp->bnd_intercomp[1][0]) {
+ for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++)
+ for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z);
+ dst[idx_dst] = dst[idx_src];
+ }
+ }
}
void msa_mg_eval(CCTK_ARGUMENTS)
@@ -631,13 +901,13 @@ void msa_mg_eval(CCTK_ARGUMENTS)
if (solver_idx) {
MG2DContext *solver_src = cp->solver_eval[solver_idx - 1];
- for (int line = 0; line < solver->domain_size; line++) {
+ for (int line = 0; line < solver->local_size[1] ; line++) {
memcpy(solver->u + line * solver->u_stride, solver_src->u + line * solver_src->u_stride,
- sizeof(*solver->u) * solver->domain_size);
+ sizeof(*solver->u) * solver->local_size[0]);
}
} else {
- for (int line = 0; line < solver->domain_size; line++)
- for (int i = 0; i < solver->domain_size; i++)
+ for (int line = 0; line < solver->local_size[1]; line++)
+ for (int i = 0; i < solver->local_size[0]; i++)
solver->u[line * solver->u_stride + i] = lapse_mg[(cp->offset_left[1] + line) * cctk_lsh[0] + cp->offset_left[0] + i] - 1.0;
}
@@ -648,40 +918,48 @@ void msa_mg_eval(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
start = gettime();
- for (ptrdiff_t idx_z = cp->offset_left[1] + solver->domain_size - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
- for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
- lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
- }
- for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
- for (ptrdiff_t idx_x = cp->offset_left[0] + solver->domain_size - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
- lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
- }
+ if (!cp->bnd_intercomp[1][1]) {
+ for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
+ for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
+ lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ }
+ }
+ if (!cp->bnd_intercomp[0][1]) {
+ for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
+ for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
+ lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ }
+ }
- for (int j = 0; j < solver->fd_stencil; j++) {
- MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U];
- double *dst = bnd->val + j * bnd->val_stride;
- for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->domain_size - 1 + j);
- dst[i] = lapse_mg_eval[idx] - 1.0;
- }
- if (j == 0) {
- dst = solver->u + solver->u_stride * (solver->domain_size - 1);
- memcpy(dst, bnd->val, sizeof(*dst) * solver->domain_size);
+ if (!cp->bnd_intercomp[1][1]) {
+ for (int j = 0; j < solver->fd_stencil; j++) {
+ MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U];
+ double *dst = bnd->val + j * bnd->val_stride;
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[0] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->local_size[1] - 1 + j);
+ dst[i] = lapse_mg_eval[idx] - 1.0;
+ }
+ if (j == 0) {
+ dst = solver->u + solver->u_stride * (solver->local_size[1] - 1);
+ memcpy(dst, bnd->val, sizeof(*dst) * solver->local_size[0]);
+ }
}
}
- for (int j = 0; j < solver->fd_stencil; j++) {
- MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U];
- double *dst = bnd->val + j * bnd->val_stride;
- for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i);
- dst[i] = lapse_mg_eval[idx] - 1.0;
- }
- if (j == 0) {
- dst = solver->u + solver->domain_size - 1;
- for (ptrdiff_t i = 0; i < solver->domain_size; i++)
- dst[i * solver->u_stride] = bnd->val[i];
+ if (!cp->bnd_intercomp[0][1]) {
+ for (int j = 0; j < solver->fd_stencil; j++) {
+ MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U];
+ double *dst = bnd->val + j * bnd->val_stride;
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[1] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i);
+ dst[i] = lapse_mg_eval[idx] - 1.0;
+ }
+ if (j == 0) {
+ dst = solver->u + solver->local_size[0] - 1;
+ for (ptrdiff_t i = 0; i < solver->local_size[1]; i++)
+ dst[i * solver->u_stride] = bnd->val[i];
+ }
}
}
ms->time_fine_boundaries += gettime() - start;
@@ -698,6 +976,16 @@ void msa_mg_eval(CCTK_ARGUMENTS)
solution_to_grid(cp, solver, lapse_mg_eval);
ms->time_fine_export = gettime() - start;
+ if (cp->nb_components && solver_idx == 0) {
+ start = gettime();
+
+ MPI_Alltoallw(lapse_mg_eval, cp->sendcounts, cp->senddispl, cp->sendtypes,
+ lapse_mg_eval, cp->recvcounts, cp->recvdispl, cp->recvtypes,
+ MPI_COMM_WORLD);
+
+ ms->time_mpi_sync = gettime() - start;
+ }
+
ms->time_fine_solve += gettime() - fine_solve_start;
ms->count_fine_solve++;
}
@@ -772,27 +1060,30 @@ void msa_mg_solve(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
- for (int j = 0; j < cp->solver->fd_stencil; j++) {
- for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->domain_size - 1 + j);
- lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ if (!cp->bnd_intercomp[1][1]) {
+ for (int j = 0; j < cp->solver->fd_stencil; j++) {
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j);
+ lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ }
}
}
- for (int j = 0; j < cp->solver->fd_stencil; j++) {
- for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i);
- lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ if (!cp->bnd_intercomp[0][1]) {
+ for (int j = 0; j < cp->solver->fd_stencil; j++) {
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i);
+ lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx];
+ }
}
}
} else {
/* use the solution from the coarser level as the initial guess */
- CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1);
+ //CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1);
- ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride,
- (size_t [2]){ cp1->solver->domain_size, cp1->solver->domain_size },
- cp1->solver->step);
- if (ret < 0)
- CCTK_WARN(0, "Error setting the initial guess");
+ //ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride,
+ // cp1->solver->local_size, cp1->solver->step);
+ //if (ret < 0)
+ // CCTK_WARN(0, "Error setting the initial guess");
}
/* if the condition above was false, then lapse_mg should be filled by
@@ -801,20 +1092,24 @@ void msa_mg_solve(CCTK_ARGUMENTS)
*/
/* fill the solver boundary conditions */
- for (int j = 0; j < cp->solver->fd_stencil; j++) {
- MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U];
- double *dst = bnd->val + j * bnd->val_stride;
- for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->domain_size - 1 + j);
- dst[i] = lapse_mg[idx] - 1.0;
+ if (!cp->bnd_intercomp[1][1]) {
+ for (int j = 0; j < cp->solver->fd_stencil; j++) {
+ MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U];
+ double *dst = bnd->val + j * bnd->val_stride;
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j);
+ dst[i] = lapse_mg[idx] - 1.0;
+ }
}
}
- for (int j = 0; j < cp->solver->fd_stencil; j++) {
- MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U];
- double *dst = bnd->val + j * bnd->val_stride;
- for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) {
- const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i));
- dst[i] = lapse_mg[idx] - 1.0;
+ if (!cp->bnd_intercomp[0][1]) {
+ for (int j = 0; j < cp->solver->fd_stencil; j++) {
+ MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U];
+ double *dst = bnd->val + j * bnd->val_stride;
+ for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) {
+ const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i));
+ dst[i] = lapse_mg[idx] - 1.0;
+ }
}
}
}
@@ -882,6 +1177,10 @@ void msa_mg_init(CCTK_ARGUMENTS)
int ret;
int nb_levels_type;
int nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type);
+ int use_tapered_grids = *(int*)CCTK_ParameterGet("use_tapered_grids", "Carpet", &use_tapered_grids);
+
+ if (!use_tapered_grids)
+ CCTK_WARN(0, "MaximalSlicingAxiMG only works with use_tapered_grids=1");
if (!ms) {
ret = context_init(cctkGH, fd_stencil, maxiter, exact_size, nb_cycles,
@@ -940,17 +1239,21 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS)
cp = get_coord_patch(ms, reflevel);
- for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++)
- for (int idx_x = cp->offset_left[0] + cp->solver->domain_size - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- epsdis[idx_dst] = 0.0;
- }
+ if (!cp->bnd_intercomp[0][1]) {
+ for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++)
+ for (int idx_x = cp->offset_left[0] + cp->solver->local_size[0] - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ epsdis[idx_dst] = 0.0;
+ }
+ }
- for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++)
- for (int idx_z = cp->offset_left[0] + cp->solver->domain_size - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) {
- const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
- epsdis[idx_dst] = 0.0;
- }
+ if (!cp->bnd_intercomp[1][1]) {
+ for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++)
+ for (int idx_z = cp->offset_left[0] + cp->solver->local_size[1] - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) {
+ const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
+ epsdis[idx_dst] = 0.0;
+ }
+ }
}
void msa_mg_terminate_print_stats(CCTK_ARGUMENTS)