From 1afea75597177b736429f8f6368bb5ecd0f0a389 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 24 May 2019 13:08:08 +0200 Subject: Add support for MPI. --- configuration.ccl | 1 + interface.ccl | 4 +- schedule.ccl | 3 + src/maximal_slicing_axi_mg.c | 557 +++++++++++++++++++++++++++++++++---------- 4 files changed, 436 insertions(+), 129 deletions(-) diff --git a/configuration.ccl b/configuration.ccl index f124283..4a3a32b 100644 --- a/configuration.ccl +++ b/configuration.ccl @@ -1,2 +1,3 @@ # Configuration definition for thorn MaximalSlicingAxiMG +REQUIRES MPI diff --git a/interface.ccl b/interface.ccl index 124823e..8ebef12 100644 --- a/interface.ccl +++ b/interface.ccl @@ -19,5 +19,5 @@ REAL lapse_mg TYPE=GF TIMELEVELS=2 REAL lapse_mg_eval TYPE=GF tags='Prolongation="None"' REAL lapse_prev0 TYPE=GF tags='Prolongation="None"' REAL lapse_prev1 TYPE=GF tags='Prolongation="None"' -REAL lapse_prev0_time TYPE=ARRAY DIM=1 SIZE=32 -REAL lapse_prev1_time TYPE=ARRAY DIM=1 SIZE=32 +REAL lapse_prev0_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant +REAL lapse_prev1_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant diff --git a/schedule.ccl b/schedule.ccl index 673a151..08327e9 100644 --- a/schedule.ccl +++ b/schedule.ccl @@ -14,6 +14,7 @@ if (CCTK_Equals(lapse_evolution_method, "maximal_axi_mg")) { } "" SCHEDULE msa_mg_eval IN MoL_CalcRHS BEFORE ML_BSSN_evolCalcGroup { + SYNC: ML_lapse LANG: C } "" @@ -23,6 +24,8 @@ if (CCTK_Equals(lapse_evolution_method, "maximal_axi_mg")) { } "" SCHEDULE msa_mg_solve IN CCTK_POSTSTEP { + SYNC: ML_lapse + SYNC: lapse_mg LANG: C } "" diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 69832cf..cf56fb9 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -9,6 +9,7 @@ #include #include +#include #include "cctk.h" #include "cctk_Arguments.h" @@ -16,6 +17,8 @@ #include "cctk_Timers.h" #include "util_Table.h" +#include + #define SQR(x) ((x) * (x)) #define ABS(x) ((x >= 0) ? (x) : -(x)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) @@ -28,6 +31,14 @@ #define LOGDEBUG #endif +#define mg_assert(x) \ +do { \ + if (!(x)) { \ + fprintf(stderr, "Assertion " #x " failed\n"); \ + abort(); \ + } \ +} while (0) + static const struct { const char *str; enum MG2DLogLevel level; @@ -64,6 +75,17 @@ typedef struct CoordPatch { /* number of x/z grid points by which the elliptic solver domain is offset * from the cactus grid */ ptrdiff_t offset_left[2]; + int bnd_intercomp[2][2]; + + /* MPI sync for the outer ghostzones */ + MPI_Datatype synctype; + int nb_components; + int *sendcounts; + int *senddispl; + MPI_Datatype *sendtypes; + int *recvcounts; + int *recvdispl; + MPI_Datatype *recvtypes; } CoordPatch; typedef struct MSMGContext { @@ -112,6 +134,8 @@ typedef struct MSMGContext { int64_t time_fine_mg2d; int64_t time_fine_export; int64_t time_init_guess_eval; + + int64_t time_mpi_sync; } MSMGContext; static MSMGContext *ms; @@ -140,6 +164,15 @@ static void coord_patch_free(CoordPatch *cp) free(cp->solver_eval); cp->solver_eval = NULL; cp->nb_solver_eval = 0; + + if (cp->synctype) + MPI_Type_free(&cp->synctype); + free(cp->sendcounts); + free(cp->senddispl); + free(cp->sendtypes); + free(cp->recvcounts); + free(cp->recvdispl); + free(cp->recvtypes); } static void log_callback(const MG2DContext *ctx, int level, @@ -154,13 +187,18 @@ static void log_callback(const MG2DContext *ctx, int level, } static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size, - double step) + size_t component_start[2], size_t component_size[2], + double step) { const char *omp_threads = getenv("OMP_NUM_THREADS"); MG2DContext *solver; + int multi_component = component_size[0] < domain_size || component_size[1] < domain_size; - solver = mg2d_solver_alloc(domain_size); + if (multi_component) + solver = mg2d_solver_alloc_mpi(MPI_COMM_WORLD, component_start, component_size); + else + solver = mg2d_solver_alloc(domain_size); if (!solver) return NULL; @@ -196,25 +234,100 @@ static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size, /* initialize boundary values to zero, * non-zero values on the outer boundaries of refined levels are filled in elsewhere */ for (int i = 0; i < 4; i++) { - for (size_t j = 0; j < solver->fd_stencil; j++) + int ci = mg2d_bnd_coord_idx(i); + for (size_t j = 0; j < solver->fd_stencil; j++) { memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j, - 0, sizeof(*solver->boundaries[i]->val) * (domain_size + 2 * j)); + 0, sizeof(*solver->boundaries[i]->val) * (component_size[!ci] + 2 * j)); + } } return solver; } +/** + * Compute the refinement level geometry. + * + * The level is assumed to be a single square patch that is composed of + * nProcs components. The lower x and z boundaries are assumed to be reflection. + * For our purposes, both the physical outer boundaries and the refinement + * boundaries are equivalent, while the intercomponent boundaries are treated + * differently. Since Cactus only allows distinguishing between "physical" and + * "all other" boundaries, we need to compute the extents manually. + * + * @param level_size will contain the number of points in x/z directions on the + * complete refinement level, including all the ghost zones up to the outer + * level boundary. + * + * @param component_start indices of the component origin in the refinement level + * + * @param component_size number of points owned by this component; when the + * component is on the outer level boundary, this includes all the ghost zones + * up to the outer boundary. + */ +static void get_extents(size_t **pextents, int *level_size, cGH *gh) +{ + int nb_procs = CCTK_nProcs(gh); + int local_proc = CCTK_MyProc(gh); + + size_t *extents; + + extents = calloc(nb_procs, 4 * sizeof(*extents)); + if (!extents) + CCTK_WARN(0, "Error allocating the extents"); + + for (int dir = 0; dir < 2; dir++) { + extents[4 * local_proc + 0 + dir] = gh->cctk_lbnd[2 * dir]; + extents[4 * local_proc + 2 + dir] = gh->cctk_lsh[2 * dir] - gh->cctk_nghostzones[2 * dir]; + } + + if (nb_procs > 1) + MPI_Allgather(MPI_IN_PLACE, 0, NULL, extents, 4 * sizeof(*extents), MPI_BYTE, MPI_COMM_WORLD); + + level_size[0] = 0; + level_size[1] = 0; + + for (int dir = 0; dir < 2; dir++) { + for (int comp = 0; comp < nb_procs; comp++) { + size_t start = extents[4 * comp + 0 + dir]; + size_t size = extents[4 * comp + 2 + dir]; + if (start + size > level_size[dir]) + level_size[dir] = start + size; + } + + // if the upper boundary is an inter-component boundary, substract the ghost zones + for (int comp = 0; comp < nb_procs; comp++) { + size_t start = extents[4 * comp + 0 + dir]; + size_t size = extents[4 * comp + 2 + dir]; + + if (start + size < level_size[dir]) + extents[4 * comp + 2 + dir] -= gh->cctk_nghostzones[2 * dir]; + } + } + + *pextents = extents; +} + static CoordPatch *get_coord_patch(MSMGContext *ms, int level) { cGH *gh = ms->gh; + const int nb_procs = CCTK_nProcs(gh); + const int local_proc = CCTK_MyProc(gh); + const size_t grid_size = gh->cctk_lsh[2] * gh->cctk_lsh[1] * gh->cctk_lsh[0]; const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); + const int gx = gh->cctk_nghostzones[0]; + const int gz = gh->cctk_nghostzones[2]; + CoordPatch *cp; - size_t domain_size[2]; + + size_t *extents; + size_t interior_size[2], component_start[2], component_size[2]; + int level_size[2]; + int bnd_intercomp[2][2]; int integrator_substeps = MoLNumIntegratorSubsteps(); int i, ret; @@ -244,10 +357,28 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (i == gh->cctk_lsh[1]) CCTK_WARN(0, "The grid does not include y==0"); - for (int i = 0; i < 2; i++) { - ptrdiff_t offset_left = ms->gh->cctk_nghostzones[2 * i]; - ptrdiff_t offset_right = offset_left; + get_extents(&extents, level_size, gh); + component_start[0] = extents[4 * local_proc + 0 + 0]; + component_start[1] = extents[4 * local_proc + 0 + 1]; + component_size[0] = extents[4 * local_proc + 2 + 0]; + component_size[1] = extents[4 * local_proc + 2 + 1]; + for (int dir = 0; dir < 2; dir++) { + cp->bnd_intercomp[dir][0] = component_start[dir] > 0; + cp->bnd_intercomp[dir][1] = (component_start[dir] + component_size[dir]) < level_size[dir]; + cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir]; + } + + if (level_size[0] != level_size[1]) + CCTK_WARN(0, "The refinement level is non-square, only square levels are supported"); + + for (int dir = 0; dir < 2; dir++) { + ptrdiff_t offset_right; + interior_size[dir] = component_size[dir]; + if (cp->bnd_intercomp[dir][1]) + continue; + + offset_right = gh->cctk_nghostzones[2 * dir]; /* account for grid tapering on refined levels */ if (cp->level > 0) offset_right *= integrator_substeps * 2; @@ -257,41 +388,171 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (cp->level > 0) offset_right--; - domain_size[i] = ms->gh->cctk_lsh[2 * i] - offset_left - offset_right; + interior_size[dir] = component_size[dir] - offset_right; /* the outer boundary layer overlaps with the first ghost zone */ if (cp->level > 0) - domain_size[i]++; - - cp->offset_left[i] = offset_left; + interior_size[dir]++; } - if (domain_size[0] != domain_size[1]) - CCTK_WARN(0, "The grid is non-square, only square grids are supported"); - - cp->solver = solver_alloc(ms, level, domain_size[0], a_x[1] - a_x[0]); - if (!cp->solver) - CCTK_WARN(0, "Error allocating the solver"); - - /* on the coarsest levels we allocate a solver for each of the substeps + /* on the finest level we allocate a solver for each of the substeps * between the coarser-grid timelevels */ if (level == ms->nb_levels - 1) { - cp->nb_solver_eval = 2 * integrator_substeps; - cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval)); + cp->nb_solver_eval = 2 * integrator_substeps; + cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval)); if (!cp->solver_eval) CCTK_WARN(0, "Error allocating solvers"); for (int step = 0; step < 2; step++) for (int substep = 0; substep < integrator_substeps; substep++) { - size_t size = cp->grid_size[0] - cp->offset_left[0] - (ms->fd_stencil - 1) - - ms->gh->cctk_nghostzones[0] * (step * integrator_substeps + substep); - MG2DContext *s = solver_alloc(ms, level, size, a_x[1] - a_x[0]); + int solver_idx = step * integrator_substeps + substep; + MG2DContext *s; + size_t size[2]; + + for (int dir = 0; dir < 2; dir++) { + size[dir] = component_size[dir]; + if (!cp->bnd_intercomp[dir][1]) + size[dir] -= (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx; + } + s = solver_alloc(ms, level, level_size[0], component_start, size, a_x[1] - a_x[0]); if (!s) CCTK_WARN(0, "Error allocating the solver"); - cp->solver_eval[step * integrator_substeps + substep] = s; + cp->solver_eval[solver_idx] = s; + } + + /* carpet does only syncs the domain interior, which includes the + * buffer points, but excludes the outermost layer of ghost points, + * which are normally filled by prolongation from the coarser grid. + * since this solver sets those points after the solve for the first MoL + * step, we need to sync them manually */ + if (nb_procs > 1) { + int is_upper_x = component_start[0] + component_size[0] == level_size[0]; + int is_upper_z = component_start[1] + component_size[1] == level_size[1]; + int comp; + + cp->sendcounts = calloc(nb_procs, sizeof(*cp->sendcounts)); + cp->senddispl = calloc(nb_procs, sizeof(*cp->senddispl)); + cp->sendtypes = calloc(nb_procs, sizeof(*cp->sendtypes)); + cp->recvcounts = calloc(nb_procs, sizeof(*cp->recvcounts)); + cp->recvdispl = calloc(nb_procs, sizeof(*cp->recvdispl)); + cp->recvtypes = calloc(nb_procs, sizeof(*cp->recvtypes)); + if (!cp->sendcounts || !cp->senddispl || !cp->sendtypes || + !cp->recvcounts || !cp->recvdispl || !cp->recvtypes) + CCTK_WARN(0, "Error allocating sync arrays"); + cp->nb_components = nb_procs; + + MPI_Type_vector(gz, gx, ms->gh->cctk_lsh[0], MPI_DOUBLE, &cp->synctype); + MPI_Type_commit(&cp->synctype); + + /* this component touches the upper z boundary */ + if (is_upper_z) { + /* there is a component to the left of us */ + if (component_start[0] > 0) { + for (comp = 0; comp < nb_procs; comp++) { + size_t comp_start_x = extents[4 * comp + 0 + 0]; + size_t comp_start_z = extents[4 * comp + 0 + 1]; + size_t comp_size_x = extents[4 * comp + 2 + 0]; + size_t comp_size_z = extents[4 * comp + 2 + 1]; + + if (comp_start_z + comp_size_z == level_size[1] && + comp_start_x + comp_size_x == component_start[0]) + break; + } + mg_assert(comp < nb_procs); + cp->sendcounts[comp] = 1; + cp->recvcounts[comp] = 1; + cp->sendtypes[comp] = cp->synctype; + cp->recvtypes[comp] = cp->synctype; + cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + gx) * sizeof(double); + cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + 0) * sizeof(double); + } + /* there is a component to the right of us */ + if (!is_upper_x) { + for (comp = 0; comp < nb_procs; comp++) { + size_t comp_start_x = extents[4 * comp + 0 + 0]; + size_t comp_start_z = extents[4 * comp + 0 + 1]; + size_t comp_size_x = extents[4 * comp + 2 + 0]; + size_t comp_size_z = extents[4 * comp + 2 + 1]; + + if (comp_start_z + comp_size_z == level_size[1] && + comp_start_x == component_start[0] + component_size[0]) + break; + } + mg_assert(comp < nb_procs); + cp->sendcounts[comp] = 1; + cp->recvcounts[comp] = 1; + cp->sendtypes[comp] = cp->synctype; + cp->recvtypes[comp] = cp->synctype; + cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + + (gh->cctk_lsh[0] - gx * 2)) * sizeof(double); + cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + + (gh->cctk_lsh[0] - gx)) * sizeof(double); + } + } + /* this component touches the upper x boundary */ + if (is_upper_x) { + /* there is a component below us */ + if (component_start[1] > 0) { + for (comp = 0; comp < nb_procs; comp++) { + size_t comp_start_x = extents[4 * comp + 0 + 0]; + size_t comp_start_z = extents[4 * comp + 0 + 1]; + size_t comp_size_x = extents[4 * comp + 2 + 0]; + size_t comp_size_z = extents[4 * comp + 2 + 1]; + + if (comp_start_x + comp_size_x == level_size[0] && + comp_start_z + comp_size_z == component_start[1]) + break; + } + mg_assert(comp < nb_procs); + cp->sendcounts[comp] = 1; + cp->recvcounts[comp] = 1; + cp->sendtypes[comp] = cp->synctype; + cp->recvtypes[comp] = cp->synctype; + cp->senddispl[comp] = (gz * gh->cctk_lsh[0] + gh->cctk_lsh[0] - gx) * sizeof(double); + cp->recvdispl[comp] = ( + gh->cctk_lsh[0] - gx) * sizeof(double); + } + /* there is a component above us */ + if (!is_upper_z) { + for (comp = 0; comp < nb_procs; comp++) { + size_t comp_start_x = extents[4 * comp + 0 + 0]; + size_t comp_start_z = extents[4 * comp + 0 + 1]; + size_t comp_size_x = extents[4 * comp + 2 + 0]; + size_t comp_size_z = extents[4 * comp + 2 + 1]; + + if (comp_start_x + comp_size_x == level_size[0] && + comp_start_z == component_start[1] + component_size[1]) + break; + } + mg_assert(comp < nb_procs); + cp->sendcounts[comp] = 1; + cp->recvcounts[comp] = 1; + cp->sendtypes[comp] = cp->synctype; + cp->recvtypes[comp] = cp->synctype; + cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz * 2) * gh->cctk_lsh[0] + + (gh->cctk_lsh[0] - gx)) * sizeof(double); + cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + + (gh->cctk_lsh[0] - gx)) * sizeof(double); + } + } + + for (int comp = 0; comp < nb_procs; comp++) { + mg_assert(!!cp->sendtypes[comp] == !!cp->recvtypes[comp]); + if (!cp->sendtypes[comp]) { + cp->sendtypes[comp] = MPI_BYTE; + cp->recvtypes[comp] = MPI_BYTE; + } } + } + } + cp->solver = solver_alloc(ms, level, level_size[0], component_start, + interior_size, a_x[1] - a_x[0]); + if (!cp->solver) + CCTK_WARN(0, "Error allocating the solver"); + + free(extents); + ms->nb_patches++; return cp; } @@ -436,13 +697,14 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver double *a_Xtx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt1"); double *a_Xtz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt3"); - for (int idx_z = 0; idx_z < solver->domain_size; idx_z++) - for (int idx_x = 0; idx_x < solver->domain_size; idx_x++) { + for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) + for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); const int idx_dc = idx_z * solver->diff_coeffs_stride + idx_x; const int idx_rhs = idx_z * solver->rhs_stride + idx_x; const double x = a_x[idx_src]; + const int on_axis = fabs(x) < 1e-13; const double gtxx = a_gtxx[idx_src]; const double gtyy = a_gtyy[idx_src]; @@ -508,10 +770,10 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver Xx = SQR(phi) * (Xtx + (phi_dx * gtu[0][0] + phi_dz * gtu[0][2]) / phi); Xz = SQR(phi) * (Xtz + (phi_dx * gtu[0][2] + phi_dz * gtu[2][2]) / phi); - solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + ((idx_x == 0) ? gtu[1][1] : 0.0)); + solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + (on_axis ? gtu[1][1] : 0.0)); solver->diff_coeffs[MG2D_DIFF_COEFF_02][idx_dc] = SQR(phi) * gtu[2][2]; solver->diff_coeffs[MG2D_DIFF_COEFF_11][idx_dc] = SQR(phi) * gtu[0][2] * 2; - solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (idx_x ? SQR(phi) * gtu[1][1] / x : 0.0); + solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (on_axis ? 0.0 : SQR(phi) * gtu[1][1] / x); solver->diff_coeffs[MG2D_DIFF_COEFF_01][idx_dc] = -Xz; solver->diff_coeffs[MG2D_DIFF_COEFF_00][idx_dc] = -k2; solver->rhs[idx_rhs] = k2; @@ -520,8 +782,8 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) { - for (int j = 0; j < solver->domain_size; j++) - for (int i = 0; i < solver->domain_size; i++) { + for (int j = 0; j < solver->local_size[1]; j++) + for (int i = 0; i < solver->local_size[0]; i++) { const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); const int idx_src = j * solver->u_stride + i; dst[idx_dst] = 1.0 + solver->u[idx_src]; @@ -529,34 +791,42 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) if (cp->level == 0) { /* on the coarsest level, extrapolate the outer boundary ghost points */ - for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->domain_size; idx_z++) - for (int idx_x = cp->offset_left[1] + solver->domain_size; idx_x < cp->grid_size[0]; idx_x++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2]; - } + if (!cp->bnd_intercomp[0][1]) { + for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->local_size[1]; idx_z++) + for (int idx_x = cp->offset_left[0] + solver->local_size[0]; idx_x < cp->grid_size[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2]; + } + } - for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++) - for (int idx_z = cp->offset_left[1] + solver->domain_size; idx_z < cp->grid_size[2]; idx_z++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1); - const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2); - dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1]; - } + if (!cp->bnd_intercomp[1][1]) { + for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++) + for (int idx_z = cp->offset_left[1] + solver->local_size[1]; idx_z < cp->grid_size[2]; idx_z++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1); + const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2); + dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1]; + } + } } /* fill in the axial symmetry ghostpoints by mirroring */ - for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) - for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z); - dst[idx_dst] = dst[idx_src]; - } - for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++) - for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z); - dst[idx_dst] = dst[idx_src]; - } + if (!cp->bnd_intercomp[0][0]) { + for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) + for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z); + dst[idx_dst] = dst[idx_src]; + } + } + if (!cp->bnd_intercomp[1][0]) { + for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++) + for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z); + dst[idx_dst] = dst[idx_src]; + } + } } void msa_mg_eval(CCTK_ARGUMENTS) @@ -631,13 +901,13 @@ void msa_mg_eval(CCTK_ARGUMENTS) if (solver_idx) { MG2DContext *solver_src = cp->solver_eval[solver_idx - 1]; - for (int line = 0; line < solver->domain_size; line++) { + for (int line = 0; line < solver->local_size[1] ; line++) { memcpy(solver->u + line * solver->u_stride, solver_src->u + line * solver_src->u_stride, - sizeof(*solver->u) * solver->domain_size); + sizeof(*solver->u) * solver->local_size[0]); } } else { - for (int line = 0; line < solver->domain_size; line++) - for (int i = 0; i < solver->domain_size; i++) + for (int line = 0; line < solver->local_size[1]; line++) + for (int i = 0; i < solver->local_size[0]; i++) solver->u[line * solver->u_stride + i] = lapse_mg[(cp->offset_left[1] + line) * cctk_lsh[0] + cp->offset_left[0] + i] - 1.0; } @@ -648,40 +918,48 @@ void msa_mg_eval(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); start = gettime(); - for (ptrdiff_t idx_z = cp->offset_left[1] + solver->domain_size - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) - for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); - lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; - } - for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) - for (ptrdiff_t idx_x = cp->offset_left[0] + solver->domain_size - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); - lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; - } + if (!cp->bnd_intercomp[1][1]) { + for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) + for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); + lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + } + } + if (!cp->bnd_intercomp[0][1]) { + for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) + for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); + lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + } + } - for (int j = 0; j < solver->fd_stencil; j++) { - MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; - double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->domain_size - 1 + j); - dst[i] = lapse_mg_eval[idx] - 1.0; - } - if (j == 0) { - dst = solver->u + solver->u_stride * (solver->domain_size - 1); - memcpy(dst, bnd->val, sizeof(*dst) * solver->domain_size); + if (!cp->bnd_intercomp[1][1]) { + for (int j = 0; j < solver->fd_stencil; j++) { + MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[0] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->local_size[1] - 1 + j); + dst[i] = lapse_mg_eval[idx] - 1.0; + } + if (j == 0) { + dst = solver->u + solver->u_stride * (solver->local_size[1] - 1); + memcpy(dst, bnd->val, sizeof(*dst) * solver->local_size[0]); + } } } - for (int j = 0; j < solver->fd_stencil; j++) { - MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; - double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i); - dst[i] = lapse_mg_eval[idx] - 1.0; - } - if (j == 0) { - dst = solver->u + solver->domain_size - 1; - for (ptrdiff_t i = 0; i < solver->domain_size; i++) - dst[i * solver->u_stride] = bnd->val[i]; + if (!cp->bnd_intercomp[0][1]) { + for (int j = 0; j < solver->fd_stencil; j++) { + MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[1] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); + dst[i] = lapse_mg_eval[idx] - 1.0; + } + if (j == 0) { + dst = solver->u + solver->local_size[0] - 1; + for (ptrdiff_t i = 0; i < solver->local_size[1]; i++) + dst[i * solver->u_stride] = bnd->val[i]; + } } } ms->time_fine_boundaries += gettime() - start; @@ -698,6 +976,16 @@ void msa_mg_eval(CCTK_ARGUMENTS) solution_to_grid(cp, solver, lapse_mg_eval); ms->time_fine_export = gettime() - start; + if (cp->nb_components && solver_idx == 0) { + start = gettime(); + + MPI_Alltoallw(lapse_mg_eval, cp->sendcounts, cp->senddispl, cp->sendtypes, + lapse_mg_eval, cp->recvcounts, cp->recvdispl, cp->recvtypes, + MPI_COMM_WORLD); + + ms->time_mpi_sync = gettime() - start; + } + ms->time_fine_solve += gettime() - fine_solve_start; ms->count_fine_solve++; } @@ -772,27 +1060,30 @@ void msa_mg_solve(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); - for (int j = 0; j < cp->solver->fd_stencil; j++) { - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->domain_size - 1 + j); - lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + if (!cp->bnd_intercomp[1][1]) { + for (int j = 0; j < cp->solver->fd_stencil; j++) { + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); + lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + } } } - for (int j = 0; j < cp->solver->fd_stencil; j++) { - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i); - lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + if (!cp->bnd_intercomp[0][1]) { + for (int j = 0; j < cp->solver->fd_stencil; j++) { + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); + lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; + } } } } else { /* use the solution from the coarser level as the initial guess */ - CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1); + //CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1); - ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride, - (size_t [2]){ cp1->solver->domain_size, cp1->solver->domain_size }, - cp1->solver->step); - if (ret < 0) - CCTK_WARN(0, "Error setting the initial guess"); + //ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride, + // cp1->solver->local_size, cp1->solver->step); + //if (ret < 0) + // CCTK_WARN(0, "Error setting the initial guess"); } /* if the condition above was false, then lapse_mg should be filled by @@ -801,20 +1092,24 @@ void msa_mg_solve(CCTK_ARGUMENTS) */ /* fill the solver boundary conditions */ - for (int j = 0; j < cp->solver->fd_stencil; j++) { - MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; - double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->domain_size - 1 + j); - dst[i] = lapse_mg[idx] - 1.0; + if (!cp->bnd_intercomp[1][1]) { + for (int j = 0; j < cp->solver->fd_stencil; j++) { + MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); + dst[i] = lapse_mg[idx] - 1.0; + } } } - for (int j = 0; j < cp->solver->fd_stencil; j++) { - MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; - double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i)); - dst[i] = lapse_mg[idx] - 1.0; + if (!cp->bnd_intercomp[0][1]) { + for (int j = 0; j < cp->solver->fd_stencil; j++) { + MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i)); + dst[i] = lapse_mg[idx] - 1.0; + } } } } @@ -882,6 +1177,10 @@ void msa_mg_init(CCTK_ARGUMENTS) int ret; int nb_levels_type; int nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); + int use_tapered_grids = *(int*)CCTK_ParameterGet("use_tapered_grids", "Carpet", &use_tapered_grids); + + if (!use_tapered_grids) + CCTK_WARN(0, "MaximalSlicingAxiMG only works with use_tapered_grids=1"); if (!ms) { ret = context_init(cctkGH, fd_stencil, maxiter, exact_size, nb_cycles, @@ -940,17 +1239,21 @@ void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS) cp = get_coord_patch(ms, reflevel); - for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++) - for (int idx_x = cp->offset_left[0] + cp->solver->domain_size - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - epsdis[idx_dst] = 0.0; - } + if (!cp->bnd_intercomp[0][1]) { + for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++) + for (int idx_x = cp->offset_left[0] + cp->solver->local_size[0] - (ms->fd_stencil + 1); idx_x < cp->grid_size[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + epsdis[idx_dst] = 0.0; + } + } - for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) - for (int idx_z = cp->offset_left[0] + cp->solver->domain_size - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); - epsdis[idx_dst] = 0.0; - } + if (!cp->bnd_intercomp[1][1]) { + for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) + for (int idx_z = cp->offset_left[0] + cp->solver->local_size[1] - (ms->fd_stencil + 1); idx_z < cp->grid_size[2]; idx_z++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + epsdis[idx_dst] = 0.0; + } + } } void msa_mg_terminate_print_stats(CCTK_ARGUMENTS) -- cgit v1.2.3