#include #include #include #include #include #include #include #include #include #include #include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "cctk_Timers.h" #include "util_Table.h" #include #define SQR(x) ((x) * (x)) #define ABS(x) ((x >= 0) ? (x) : -(x)) #define MIN(x, y) ((x) > (y) ? (y) : (x)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) #define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) #if 0 #define LOGDEBUG(...) fprintf(stderr, __VA_ARGS__) #else #define LOGDEBUG #endif #define mg_assert(x) \ do { \ if (!(x)) { \ fprintf(stderr, "Assertion " #x " failed\n"); \ abort(); \ } \ } while (0) static const struct { const char *str; enum MG2DLogLevel level; } log_levels[] = { { "fatal", MG2D_LOG_FATAL }, { "error", MG2D_LOG_ERROR }, { "warning", MG2D_LOG_WARNING }, { "info", MG2D_LOG_INFO }, { "verbose", MG2D_LOG_VERBOSE }, { "debug", MG2D_LOG_DEBUG }, }; #include static inline int64_t gettime(void) { struct timeval tv; gettimeofday(&tv, NULL); return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; } /* precomputed values for a given refined grid */ typedef struct CoordPatch { int level; ptrdiff_t grid_size[3]; MG2DContext *solver; MPI_Comm solver_comm; int cur_step; MG2DContext **solver_eval; MPI_Comm *solver_eval_comm; int nb_solver_eval; ptrdiff_t y_idx; /* number of x/z grid points by which the elliptic solver domain is offset * from the cactus grid */ ptrdiff_t offset_left[2]; int bnd_intercomp[2][2]; /* MPI sync for the outer ghostzones */ MPI_Datatype synctype; int nb_components; int *sendcounts; int *senddispl; MPI_Datatype *sendtypes; int *recvcounts; int *recvdispl; MPI_Datatype *recvtypes; } CoordPatch; typedef struct MSMGContext { cGH *gh; int fd_stencil; int maxiter; int max_exact_size; int nb_cycles; int nb_relax_post; int nb_relax_pre; double tol_residual; double tol_residual_base; double cfl_factor; double base_dt; int nb_levels; CoordPatch *patches; int nb_patches; int log_level; /* timings */ int64_t time_solve; int64_t count_solve; int64_t time_solve_mg; int64_t count_solve_mg; int64_t time_solve_fill_coeffs; int64_t time_solve_boundaries; int64_t time_solve_mg2d; int64_t time_solve_export; int64_t time_solve_history; int64_t time_eval; int64_t count_eval; int64_t time_extrapolate; int64_t count_extrapolate; int64_t time_fine_solve; int64_t count_fine_solve; int64_t time_fine_fill_coeffs; int64_t time_fine_boundaries; int64_t time_fine_mg2d; int64_t time_fine_export; int64_t time_init_guess_eval; int64_t time_mpi_sync; } MSMGContext; static MSMGContext *ms; static int ctz(int a) { int ret = 0; if (!a) return INT_MAX; while (!(a & 1)) { a >>= 1; ret++; } return ret; } static void coord_patch_free(CoordPatch *cp) { mg2d_solver_free(&cp->solver); if (cp->solver_comm != MPI_COMM_NULL) MPI_Comm_free(&cp->solver_comm); for (int i = 0; i < cp->nb_solver_eval; i++) { mg2d_solver_free(&cp->solver_eval[i]); if (cp->solver_eval_comm[i] != MPI_COMM_NULL) MPI_Comm_free(&cp->solver_eval_comm[i]); } free(cp->solver_eval); cp->solver_eval = NULL; cp->nb_solver_eval = 0; if (cp->synctype) MPI_Type_free(&cp->synctype); free(cp->sendcounts); free(cp->senddispl); free(cp->sendtypes); free(cp->recvcounts); free(cp->recvdispl); free(cp->recvtypes); } static void log_callback(const MG2DContext *ctx, int level, const char *fmt, va_list vl) { MSMGContext *ms = ctx->opaque; int target_level = ms->log_level; uint8_t buf[1024]; int ret; if (level > target_level) return; ret = snprintf(buf, sizeof(buf), "[%d] t=%g ", ctz(ms->gh->cctk_levfac[0]), ms->gh->cctk_time); if (ret >= sizeof(buf)) return; vsnprintf(buf + ret, sizeof(buf) - ret, fmt, vl); fputs(buf, stderr); } static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t component_start[2], size_t component_size[2], double step, MPI_Comm comm) { const char *omp_threads = getenv("OMP_NUM_THREADS"); MG2DContext *solver; mg_assert(comm != MPI_COMM_NULL || (component_start[0] == 0 && component_start[1] == 0 && component_size[0] == component_size[1])); if (comm != MPI_COMM_NULL) solver = mg2d_solver_alloc_mpi(comm, component_start, component_size); else solver = mg2d_solver_alloc(component_size[0]); if (!solver) return NULL; solver->step[0] = step; solver->step[1] = solver->step[0]; solver->fd_stencil = ms->fd_stencil; solver->boundaries[MG2D_BOUNDARY_0L]->type = MG2D_BC_TYPE_REFLECT; solver->boundaries[MG2D_BOUNDARY_1L]->type = MG2D_BC_TYPE_REFLECT; solver->boundaries[MG2D_BOUNDARY_0U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; solver->boundaries[MG2D_BOUNDARY_1U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; solver->maxiter = ms->maxiter; if (ms->tol_residual_base > 0.0) solver->tol = ms->tol_residual_base / SQR(solver->step[0]); else solver->tol = ms->tol_residual; solver->nb_cycles = ms->nb_cycles; solver->nb_relax_post = ms->nb_relax_post; solver->nb_relax_pre = ms->nb_relax_pre; solver->cfl_factor = ms->cfl_factor; solver->max_exact_size = ms->max_exact_size; solver->opaque = ms; solver->log_callback = log_callback; if (omp_threads) solver->nb_threads = strtol(omp_threads, NULL, 0); if (solver->nb_threads <= 0) solver->nb_threads = 1; /* initialize boundary values to zero, * non-zero values on the outer boundaries of refined levels are filled in elsewhere */ for (int i = 0; i < 4; i++) { int ci = mg2d_bnd_coord_idx(i); for (size_t j = 0; j < solver->fd_stencil; j++) { memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j, 0, sizeof(*solver->boundaries[i]->val) * (component_size[!ci] + 2 * j)); } } return solver; } /** * Compute the refinement level geometry. * * The level is assumed to be a single square patch that is composed of * nProcs components. The lower x and z boundaries are assumed to be reflection. * For our purposes, both the physical outer boundaries and the refinement * boundaries are equivalent, while the intercomponent boundaries are treated * differently. Since Cactus only allows distinguishing between "physical" and * "all other" boundaries, we need to compute the extents manually. * * @param level_size will contain the number of points in x/z directions on the * complete refinement level, including all the ghost zones up to the outer * level boundary. * * @param component_start indices of the component origin in the refinement level * * @param component_size number of points owned by this component; when the * component is on the outer level boundary, this includes all the ghost zones * up to the outer boundary. */ static void get_extents(size_t **pextents, int *level_size, cGH *gh) { int nb_procs = CCTK_nProcs(gh); int local_proc = CCTK_MyProc(gh); size_t *extents; extents = calloc(nb_procs, 4 * sizeof(*extents)); if (!extents) CCTK_WARN(0, "Error allocating the extents"); for (int dir = 0; dir < 2; dir++) { extents[4 * local_proc + 0 + dir] = gh->cctk_lbnd[2 * dir]; extents[4 * local_proc + 2 + dir] = gh->cctk_lsh[2 * dir] - gh->cctk_nghostzones[2 * dir]; } if (nb_procs > 1) MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, extents, 4 * sizeof(*extents), MPI_BYTE, MPI_COMM_WORLD); level_size[0] = 0; level_size[1] = 0; for (int dir = 0; dir < 2; dir++) { for (int comp = 0; comp < nb_procs; comp++) { size_t start = extents[4 * comp + 0 + dir]; size_t size = extents[4 * comp + 2 + dir]; if (start + size > level_size[dir]) level_size[dir] = start + size; } // if the upper boundary is an inter-component boundary, substract the ghost zones for (int comp = 0; comp < nb_procs; comp++) { size_t start = extents[4 * comp + 0 + dir]; size_t size = extents[4 * comp + 2 + dir]; if (start + size < level_size[dir]) extents[4 * comp + 2 + dir] -= gh->cctk_nghostzones[2 * dir]; } } *pextents = extents; } static CoordPatch *get_coord_patch(MSMGContext *ms, int level) { cGH *gh = ms->gh; const int nb_procs = CCTK_nProcs(gh); const int local_proc = CCTK_MyProc(gh); const size_t grid_size = gh->cctk_lsh[2] * gh->cctk_lsh[1] * gh->cctk_lsh[0]; const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); const int gx = gh->cctk_nghostzones[0]; const int gz = gh->cctk_nghostzones[2]; CoordPatch *cp; size_t *extents; size_t interior_size[2], component_start[2], component_size[2]; int *solver_ranks; int nb_solver_ranks; int level_size[2]; int bnd_intercomp[2][2]; int integrator_substeps = MoLNumIntegratorSubsteps(); int i, ret; for (int i = 0; i < ms->nb_patches; i++) { cp = &ms->patches[i]; if (cp->level == level) return cp; } /* create a new patch */ ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); cp = &ms->patches[ms->nb_patches]; memset(cp, 0, sizeof(*cp)); cp->level = ctz(ms->gh->cctk_levfac[0]); cp->grid_size[0] = gh->cctk_lsh[0]; cp->grid_size[1] = gh->cctk_lsh[1]; cp->grid_size[2] = gh->cctk_lsh[2]; for (i = 0; i < gh->cctk_lsh[1]; i++) if (fabs(a_y[CCTK_GFINDEX3D(gh, 0, i, 0)]) < 1e-8) { cp->y_idx = i; break; } if (i == gh->cctk_lsh[1]) CCTK_WARN(0, "The grid does not include y==0"); get_extents(&extents, level_size, gh); component_start[0] = extents[4 * local_proc + 0 + 0]; component_start[1] = extents[4 * local_proc + 0 + 1]; component_size[0] = extents[4 * local_proc + 2 + 0]; component_size[1] = extents[4 * local_proc + 2 + 1]; for (int dir = 0; dir < 2; dir++) { cp->bnd_intercomp[dir][0] = component_start[dir] > 0; cp->bnd_intercomp[dir][1] = (component_start[dir] + component_size[dir]) < level_size[dir]; cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir]; } if (level_size[0] != level_size[1]) CCTK_WARN(0, "The refinement level is non-square, only square levels are supported"); solver_ranks = calloc(nb_procs, sizeof(*solver_ranks)); if (!solver_ranks) CCTK_WARN(0, "Error allocating solver ranks"); /* on the finest level we allocate a solver for each of the substeps * between the coarser-grid timelevels */ if (level == ms->nb_levels - 1) { cp->nb_solver_eval = 2 * integrator_substeps; cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval)); cp->solver_eval_comm = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval_comm)); if (!cp->solver_eval || !cp->solver_eval_comm) CCTK_WARN(0, "Error allocating solvers"); for (int step = 0; step < 2; step++) for (int substep = 0; substep < integrator_substeps; substep++) { int solver_idx = step * integrator_substeps + substep; MG2DContext *s; size_t size[2]; nb_solver_ranks = 0; for (int proc = 0; proc < nb_procs; proc++) { size_t size_tmp[2]; for (int dir = 0; dir < 2; dir++) { size_t offset = (ms->fd_stencil - 1) + ms->gh->cctk_nghostzones[2 * dir] * solver_idx; size_t start = extents[4 * proc + 0 + dir]; size_t end = MIN(start + extents[4 * proc + 2 + dir], level_size[dir] - offset); size_tmp[dir] = end > start ? end - start : 0; } if (size_tmp[0] > 0 && size_tmp[1] > 0) solver_ranks[nb_solver_ranks++] = proc; if (proc == local_proc) { size[0] = size_tmp[0]; size[1] = size_tmp[1]; } } if (nb_solver_ranks > 1) { MPI_Group grp_world, grp_solver; MPI_Comm_group(MPI_COMM_WORLD, &grp_world); MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver); MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_eval_comm[solver_idx]); MPI_Group_free(&grp_solver); MPI_Group_free(&grp_world); } else cp->solver_eval_comm[solver_idx] = MPI_COMM_NULL; if (size[0] > 0 && size[1] > 0) { s = solver_alloc(ms, level, component_start, size, a_x[1] - a_x[0], cp->solver_eval_comm[solver_idx]); if (!s) CCTK_WARN(0, "Error allocating the solver"); cp->solver_eval[solver_idx] = s; } } /* carpet does only syncs the domain interior, which includes the * buffer points, but excludes the outermost layer of ghost points, * which are normally filled by prolongation from the coarser grid. * since this solver sets those points after the solve for the first MoL * step, we need to sync them manually */ if (nb_procs > 1) { int is_upper_x = component_start[0] + component_size[0] == level_size[0]; int is_upper_z = component_start[1] + component_size[1] == level_size[1]; int comp; cp->sendcounts = calloc(nb_procs, sizeof(*cp->sendcounts)); cp->senddispl = calloc(nb_procs, sizeof(*cp->senddispl)); cp->sendtypes = calloc(nb_procs, sizeof(*cp->sendtypes)); cp->recvcounts = calloc(nb_procs, sizeof(*cp->recvcounts)); cp->recvdispl = calloc(nb_procs, sizeof(*cp->recvdispl)); cp->recvtypes = calloc(nb_procs, sizeof(*cp->recvtypes)); if (!cp->sendcounts || !cp->senddispl || !cp->sendtypes || !cp->recvcounts || !cp->recvdispl || !cp->recvtypes) CCTK_WARN(0, "Error allocating sync arrays"); cp->nb_components = nb_procs; MPI_Type_vector(gz, gx, ms->gh->cctk_lsh[0], MPI_DOUBLE, &cp->synctype); MPI_Type_commit(&cp->synctype); /* this component touches the upper z boundary */ if (is_upper_z) { /* there is a component to the left of us */ if (component_start[0] > 0) { for (comp = 0; comp < nb_procs; comp++) { size_t comp_start_x = extents[4 * comp + 0 + 0]; size_t comp_start_z = extents[4 * comp + 0 + 1]; size_t comp_size_x = extents[4 * comp + 2 + 0]; size_t comp_size_z = extents[4 * comp + 2 + 1]; if (comp_start_z + comp_size_z == level_size[1] && comp_start_x + comp_size_x == component_start[0]) break; } mg_assert(comp < nb_procs); cp->sendcounts[comp] = 1; cp->recvcounts[comp] = 1; cp->sendtypes[comp] = cp->synctype; cp->recvtypes[comp] = cp->synctype; cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + gx) * sizeof(double); cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + 0) * sizeof(double); } /* there is a component to the right of us */ if (!is_upper_x) { for (comp = 0; comp < nb_procs; comp++) { size_t comp_start_x = extents[4 * comp + 0 + 0]; size_t comp_start_z = extents[4 * comp + 0 + 1]; size_t comp_size_x = extents[4 * comp + 2 + 0]; size_t comp_size_z = extents[4 * comp + 2 + 1]; if (comp_start_z + comp_size_z == level_size[1] && comp_start_x == component_start[0] + component_size[0]) break; } mg_assert(comp < nb_procs); cp->sendcounts[comp] = 1; cp->recvcounts[comp] = 1; cp->sendtypes[comp] = cp->synctype; cp->recvtypes[comp] = cp->synctype; cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + (gh->cctk_lsh[0] - gx * 2)) * sizeof(double); cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + (gh->cctk_lsh[0] - gx)) * sizeof(double); } } /* this component touches the upper x boundary */ if (is_upper_x) { /* there is a component below us */ if (component_start[1] > 0) { for (comp = 0; comp < nb_procs; comp++) { size_t comp_start_x = extents[4 * comp + 0 + 0]; size_t comp_start_z = extents[4 * comp + 0 + 1]; size_t comp_size_x = extents[4 * comp + 2 + 0]; size_t comp_size_z = extents[4 * comp + 2 + 1]; if (comp_start_x + comp_size_x == level_size[0] && comp_start_z + comp_size_z == component_start[1]) break; } mg_assert(comp < nb_procs); cp->sendcounts[comp] = 1; cp->recvcounts[comp] = 1; cp->sendtypes[comp] = cp->synctype; cp->recvtypes[comp] = cp->synctype; cp->senddispl[comp] = (gz * gh->cctk_lsh[0] + gh->cctk_lsh[0] - gx) * sizeof(double); cp->recvdispl[comp] = ( + gh->cctk_lsh[0] - gx) * sizeof(double); } /* there is a component above us */ if (!is_upper_z) { for (comp = 0; comp < nb_procs; comp++) { size_t comp_start_x = extents[4 * comp + 0 + 0]; size_t comp_start_z = extents[4 * comp + 0 + 1]; size_t comp_size_x = extents[4 * comp + 2 + 0]; size_t comp_size_z = extents[4 * comp + 2 + 1]; if (comp_start_x + comp_size_x == level_size[0] && comp_start_z == component_start[1] + component_size[1]) break; } mg_assert(comp < nb_procs); cp->sendcounts[comp] = 1; cp->recvcounts[comp] = 1; cp->sendtypes[comp] = cp->synctype; cp->recvtypes[comp] = cp->synctype; cp->senddispl[comp] = ((gh->cctk_lsh[2] - gz * 2) * gh->cctk_lsh[0] + (gh->cctk_lsh[0] - gx)) * sizeof(double); cp->recvdispl[comp] = ((gh->cctk_lsh[2] - gz) * gh->cctk_lsh[0] + (gh->cctk_lsh[0] - gx)) * sizeof(double); } } for (int comp = 0; comp < nb_procs; comp++) { mg_assert(!!cp->sendtypes[comp] == !!cp->recvtypes[comp]); if (!cp->sendtypes[comp]) { cp->sendtypes[comp] = MPI_BYTE; cp->recvtypes[comp] = MPI_BYTE; } } } } nb_solver_ranks = 0; for (int proc = 0; proc < nb_procs; proc++) { size_t size[2]; for (int dir = 0; dir < 2; dir++) { size_t start = extents[4 * proc + 0 + dir]; size_t end = start + extents[4 * proc + 2 + dir]; ptrdiff_t offset_right; offset_right = gh->cctk_nghostzones[2 * dir]; /* account for grid tapering on refined levels */ if (cp->level > 0) offset_right *= integrator_substeps * 2; ///* the outer boundary layer overlaps with the first ghost zone */ //if (cp->level > 0) // offset_right++; if (cp->level > 0) offset_right--; if (cp->level > 0) offset_right--; end = MIN(end, level_size[dir] - offset_right); if (end > start) size[dir] = end - start; else size[dir] = 0; } if (size[0] > 0 && size[1] > 0) solver_ranks[nb_solver_ranks++] = proc; if (proc == local_proc) { interior_size[0] = size[0]; interior_size[1] = size[1]; } } if (nb_solver_ranks > 1) { MPI_Group grp_world, grp_solver; MPI_Comm_group(MPI_COMM_WORLD, &grp_world); MPI_Group_incl(grp_world, nb_solver_ranks, solver_ranks, &grp_solver); MPI_Comm_create(MPI_COMM_WORLD, grp_solver, &cp->solver_comm); MPI_Group_free(&grp_solver); MPI_Group_free(&grp_world); } else cp->solver_comm = MPI_COMM_NULL; if (interior_size[0] > 0 && interior_size[1] > 0) { cp->solver = solver_alloc(ms, level, component_start, interior_size, a_x[1] - a_x[0], cp->solver_comm); if (!cp->solver) CCTK_WARN(0, "Error allocating the solver"); } free(solver_ranks); free(extents); ms->nb_patches++; return cp; } static void print_stats(MSMGContext *ms) { int orig_log_level; int64_t total = ms->time_solve + ms->time_eval; fprintf(stderr, "%2.2f%% eval: %ld runs; %g s total; %g ms avg per run\n", (double)ms->time_eval * 100.0 / total, ms->count_eval, ms->time_eval / 1e6, ms->time_eval / 1e3 / ms->count_eval); fprintf(stderr, " %2.2f%% eval extrapolate: %ld runs; %g s total; %g ms avg per run\n", (double)ms->time_extrapolate * 100.0 / total, ms->count_extrapolate, ms->time_extrapolate / 1e6, ms->time_extrapolate / 1e3 / ms->count_extrapolate); fprintf(stderr, " %2.2f%% fine solve: %ld runs; %g s total; %g ms avg per run || " "%2.2f%% fill coeffs %2.2f%% boundaries %2.2f%% mg2d %2.2f%% export\n", (double)ms->time_fine_solve * 100.0 / total, ms->count_fine_solve, ms->time_fine_solve / 1e6, ms->time_fine_solve / 1e3 / ms->count_fine_solve, (double)ms->time_fine_fill_coeffs * 100.0 / ms->time_fine_solve, (double)ms->time_fine_boundaries * 100.0 / ms->time_fine_solve, (double)ms->time_fine_mg2d * 100.0 / ms->time_fine_solve, (double)ms->time_fine_export * 100.0 / ms->time_fine_solve); fprintf(stderr, "%2.2f%% solve: %ld runs; %g s total; %g ms avg per run\n", (double)ms->time_solve * 100.0 / total, ms->count_solve, ms->time_solve / 1e6, ms->time_solve / 1e3 / ms->count_solve); fprintf(stderr, " %2.2f%% solve mg2d: %ld runs; %g s total; %g ms avg per run || " "%2.2f%% fill coeffs %2.2f%% boundaries %2.2f%% mg2d %2.2f%% export %2.2f%% history\n", (double)ms->time_solve_mg * 100.0 / total, ms->count_solve_mg, ms->time_solve_mg / 1e6, ms->time_solve_mg / 1e3 / ms->count_solve_mg, (double)ms->time_solve_fill_coeffs * 100.0 / ms->time_solve_mg, (double)ms->time_solve_boundaries * 100.0 / ms->time_solve_mg, (double)ms->time_solve_mg2d * 100.0 / ms->time_solve_mg, (double)ms->time_solve_export * 100.0 / ms->time_solve_mg, (double)ms->time_solve_history * 100.0 / ms->time_solve_mg); orig_log_level = ms->log_level; ms->log_level = MG2D_LOG_VERBOSE; for (int i = 0; i < ms->nb_patches; i++) { CoordPatch *cp = get_coord_patch(ms, i); char indent_buf[64]; snprintf(indent_buf, sizeof(indent_buf), " [%d] ", i); if (cp->solver) mg2d_print_stats(cp->solver, indent_buf); for (int j = 0; j < cp->nb_solver_eval; j++) { if (!cp->solver_eval[j]) continue; snprintf(indent_buf, sizeof(indent_buf), " [%d/%d] ", i, j); mg2d_print_stats(cp->solver_eval[j], indent_buf); } } ms->log_level = orig_log_level; } static int context_init(cGH *gh, int fd_stencil, int maxiter, int exact_size, int nb_cycles, int nb_relax_pre, int nb_relax_post, double tol_residual, double tol_residual_base, double cfl_factor, int nb_levels, const char *loglevel_str, MSMGContext **ctx) { MSMGContext *ms; int loglevel = -1; ms = calloc(1, sizeof(*ms)); if (!ms) return -ENOMEM; ms->gh = gh; ms->fd_stencil = fd_stencil; ms->maxiter = maxiter; ms->max_exact_size = exact_size; ms->nb_cycles = nb_cycles; ms->nb_relax_pre = nb_relax_pre; ms->nb_relax_post = nb_relax_post; ms->tol_residual = tol_residual; ms->tol_residual_base = tol_residual_base; ms->cfl_factor = cfl_factor; ms->base_dt = gh->cctk_delta_time; ms->nb_levels = nb_levels; for (int i = 0; i < ARRAY_ELEMS(log_levels); i++) { if (!strcmp(loglevel_str, log_levels[i].str)) { loglevel = log_levels[i].level; break; } } if (loglevel < 0) { fprintf(stderr, "Invalid loglevel: %s\n", loglevel_str); return -EINVAL; } ms->log_level = loglevel; *ctx = ms; return 0; } static void context_free(MSMGContext **pms) { MSMGContext *ms = *pms; if (!ms) return; for (int i = 0; i < ms->nb_patches; i++) coord_patch_free(&ms->patches[i]); free(ms->patches); free(ms); *pms = NULL; } static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver) { cGH *gh = ctx->gh; int ret; const ptrdiff_t stride_z = CCTK_GFINDEX3D(gh, 0, 0, 1); double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); const double dx = a_x[1] - a_x[0]; const double dz = a_z[stride_z] - a_z[0]; double *a_gtxx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt11"); double *a_gtyy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt22"); double *a_gtzz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt33"); double *a_gtxy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt12"); double *a_gtxz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt13"); double *a_gtyz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt23"); double *a_Atxx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At11"); double *a_Atyy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At22"); double *a_Atzz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At33"); double *a_Atxy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At12"); double *a_Atxz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At13"); double *a_Atyz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At23"); double *a_phi = CCTK_VarDataPtr(gh, 0, "ML_BSSN::phi"); double *a_Xtx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt1"); double *a_Xtz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt3"); solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_DISCONT; solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_POLE; #pragma omp parallel for for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; const int idx_rhs = idx_z * solver->rhs_stride + idx_x; const double x = a_x[idx_src]; const int on_axis = fabs(x) < 1e-13; const double gtxx = a_gtxx[idx_src]; const double gtyy = a_gtyy[idx_src]; const double gtzz = a_gtzz[idx_src]; const double gtxy = a_gtxy[idx_src]; const double gtxz = a_gtxz[idx_src]; const double gtyz = a_gtyz[idx_src]; const double Atxx = a_Atxx[idx_src]; const double Atyy = a_Atyy[idx_src]; const double Atzz = a_Atzz[idx_src]; const double Atxy = a_Atxy[idx_src]; const double Atxz = a_Atxz[idx_src]; const double Atyz = a_Atyz[idx_src]; const double phi = a_phi[idx_src]; const double Xtx = a_Xtx[idx_src]; const double Xtz = a_Xtz[idx_src]; const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); const double At[3][3] = { { Atxx, Atxy, Atxz }, { Atxy, Atyy, Atyz }, { Atxz, Atyz, Atzz }}; double Xx, Xz, k2; double phi_dx, phi_dz; double Am[3][3], gtu[3][3]; if (ctx->fd_stencil == 1) { phi_dx = (a_phi[idx_src + 1] - a_phi[idx_src - 1]) / (2.0 * dx); phi_dz = (a_phi[idx_src + stride_z] - a_phi[idx_src - stride_z]) / (2.0 * dz); } else { phi_dx = (-1.0 * a_phi[idx_src + 2] + 8.0 * a_phi[idx_src + 1] - 8.0 * a_phi[idx_src - 1] + a_phi[idx_src - 2]) / (12.0 * dx); phi_dz = (-1.0 * a_phi[idx_src + 2 * stride_z] + 8.0 * a_phi[idx_src + 1 * stride_z] - 8.0 * a_phi[idx_src - 1 * stride_z] + a_phi[idx_src - 2 * stride_z]) / (12.0 * dz); } gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; gtu[1][0] = gtu[0][1]; gtu[2][0] = gtu[0][2]; gtu[2][1] = gtu[1][2]; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gtu[j][l] * At[l][k]; Am[j][k] = val; } // K_{ij} K^{ij} k2 = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) k2 += Am[j][k] * Am[k][j]; Xx = SQR(phi) * (Xtx + (phi_dx * gtu[0][0] + phi_dz * gtu[0][2]) / phi); Xz = SQR(phi) * (Xtz + (phi_dx * gtu[0][2] + phi_dz * gtu[2][2]) / phi); solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = SQR(phi) * (gtu[0][0] + (on_axis ? gtu[1][1] : 0.0)); solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = SQR(phi) * gtu[2][2]; solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = SQR(phi) * gtu[0][2] * 2; solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -Xx + (on_axis ? 0.0 : SQR(phi) * gtu[1][1] / x); solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -Xz; solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2; solver->rhs[idx_rhs] = k2; if (on_axis) { solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = SQR(phi) * gtu[1][1]; solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = SQR(phi) * gtu[1][1]; } } } static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) { #pragma omp parallel for for (int j = 0; j < solver->local_size[1]; j++) for (int i = 0; i < solver->local_size[0]; i++) { const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); const int idx_src = j * solver->u_stride + i; dst[idx_dst] = 1.0 + solver->u[idx_src]; } if (cp->level == 0) { /* on the coarsest level, extrapolate the outer boundary ghost points */ if (!cp->bnd_intercomp[0][1]) { #pragma omp parallel for for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->local_size[1]; idx_z++) for (int idx_x = cp->offset_left[0] + solver->local_size[0]; idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2]; } } if (!cp->bnd_intercomp[1][1]) { #pragma omp parallel for for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++) for (int idx_z = cp->offset_left[1] + solver->local_size[1]; idx_z < cp->grid_size[2]; idx_z++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1); const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2); dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1]; } } } /* fill in the axial symmetry ghostpoints by mirroring */ if (!cp->bnd_intercomp[0][0]) { #pragma omp parallel for for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z); dst[idx_dst] = dst[idx_src]; } } if (!cp->bnd_intercomp[1][0]) { #pragma omp parallel for for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++) for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z); dst[idx_dst] = dst[idx_src]; } } } void msa_mg_eval(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int64_t total_start; int ret; const size_t grid_size = cctkGH->cctk_lsh[2] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[0]; const double t = cctkGH->cctk_time; const int reflevel = ctz(cctkGH->cctk_levfac[0]); double time_interp_step, fact0, fact1; total_start = gettime(); LOGDEBUG( "evaluating lapse at rl=%d, t=%g\n", reflevel, t); time_interp_step = lapse_prev1_time[reflevel] - lapse_prev0_time[reflevel]; if (lapse_prev0_time[reflevel] == DBL_MAX && lapse_prev1_time[reflevel] == DBL_MAX) { fact0 = 0.0; fact1 = 0.0; } else if (lapse_prev0_time[reflevel] == DBL_MAX) { fact0 = 0.0; fact1 = 1.0; } else { fact0 = (lapse_prev1_time[reflevel] - t) / time_interp_step; fact1 = (t - lapse_prev0_time[reflevel]) / time_interp_step; } if (!fine_solve || reflevel < ms->nb_levels - 1) { /* on coarse levels use extrapolated past solutions */ int64_t extrap_start = gettime(); LOGDEBUG( "extrapolating from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); #pragma omp parallel for for (size_t i = 0; i < grid_size; i++) lapse_mg_eval[i] = fact0 * lapse_prev0[i] + fact1 * lapse_prev1[i]; ms->time_extrapolate += gettime() - extrap_start; ms->count_extrapolate++; } else { /* on the finest level, use the extrapolation only for the boundary * values and solve for the interior */ int64_t fine_solve_start = gettime(); int64_t start; CoordPatch *cp = get_coord_patch(ms, reflevel); const int timestep = lrint(t * cctkGH->cctk_levfac[0] / ms->base_dt); int mol_substeps = MoLNumIntegratorSubsteps(); int *mol_step = CCTK_VarDataPtr(cctkGH, 0, "MoL::MoL_Intermediate_Step"); int solver_idx; MG2DContext *solver; if (!mol_step || *mol_step <= 0) CCTK_WARN(0, "Invalid MoL step"); solver_idx = (cp->cur_step & 1) * mol_substeps + (mol_substeps - *mol_step); solver = cp->solver_eval[solver_idx]; if (!solver) goto finish; start = gettime(); fill_eq_coeffs(ms, cp, solver); ms->time_fine_fill_coeffs += gettime() - start; { start = gettime(); if (solver_idx) { MG2DContext *solver_src = cp->solver_eval[solver_idx - 1]; #pragma omp parallel for for (int line = 0; line < solver->local_size[1] ; line++) { memcpy(solver->u + line * solver->u_stride, solver_src->u + line * solver_src->u_stride, sizeof(*solver->u) * solver->local_size[0]); } } else { #pragma omp parallel for for (int line = 0; line < solver->local_size[1]; line++) for (int i = 0; i < solver->local_size[0]; i++) solver->u[line * solver->u_stride + i] = lapse_mg[(cp->offset_left[1] + line) * cctk_lsh[0] + cp->offset_left[0] + i] - 1.0; } ms->time_init_guess_eval += gettime() - start; } LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); start = gettime(); if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { #pragma omp parallel for for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } } if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { #pragma omp parallel for for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } } if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { #pragma omp parallel for for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[0] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->local_size[1] - 1 + j); dst[i] = lapse_mg_eval[idx] - 1.0; } if (j == 0) { dst = solver->u + solver->u_stride * (solver->local_size[1] - 1); memcpy(dst, bnd->val, sizeof(*dst) * solver->local_size[0]); } } } if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { #pragma omp parallel for for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->local_size[1] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); dst[i] = lapse_mg_eval[idx] - 1.0; } if (j == 0) { dst = solver->u + solver->local_size[0] - 1; for (ptrdiff_t i = 0; i < solver->local_size[1]; i++) dst[i * solver->u_stride] = bnd->val[i]; } } } ms->time_fine_boundaries += gettime() - start; LOGDEBUG( "mg solve\n"); start = gettime(); ret = mg2d_solve(solver); if (ret < 0) CCTK_WARN(0, "Error solving the maximal slicing equation"); ms->time_fine_mg2d += gettime() - start; start = gettime(); solution_to_grid(cp, solver, lapse_mg_eval); ms->time_fine_export = gettime() - start; if (cp->nb_components && solver_idx == 0) { start = gettime(); MPI_Alltoallw(lapse_mg_eval, cp->sendcounts, cp->senddispl, cp->sendtypes, lapse_mg_eval, cp->recvcounts, cp->recvdispl, cp->recvtypes, MPI_COMM_WORLD); ms->time_mpi_sync = gettime() - start; } ms->time_fine_solve += gettime() - fine_solve_start; ms->count_fine_solve++; } memcpy(alpha, lapse_mg_eval, sizeof(*alpha) * grid_size); finish: ms->time_eval += gettime() - total_start; ms->count_eval++; } void msa_mg_solve(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; CoordPatch *cp; int64_t total_start, mg_start, start; int reflevel_top, ts_tmp; int ret; const size_t grid_size = cctkGH->cctk_lsh[2] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[0]; const double t = cctkGH->cctk_time; const int timestep = lrint(t * cctkGH->cctk_levfac[0] / cctkGH->cctk_delta_time); const int reflevel = ctz(cctkGH->cctk_levfac[0]); double *lapse_mg1; if (reflevel == ms->nb_levels - 1 && timestep % 2) return; total_start = gettime(); LOGDEBUG( "solve lapse at rl=%d, t=%g; step %d\n", reflevel, t, timestep); reflevel_top = reflevel; ts_tmp = timestep; while (reflevel_top > 0 && !(ts_tmp % 2)) { ts_tmp /= 2; reflevel_top--; } mg_start = gettime(); LOGDEBUG( "mg solve cur %d top %d\n", reflevel, reflevel_top); /* fill in the equation coefficients */ cp = get_coord_patch(ms, reflevel); // this happens when this component contains only the buffer points, // so we skip the solve and only fill in the extrapolation values if (!cp->solver) goto skip_solve; start = gettime(); fill_eq_coeffs(ms, cp, cp->solver); ms->time_solve_fill_coeffs += gettime() - start; start = gettime(); if (reflevel > 0) { if (timestep % 2) { /* outer-most level for this solve, use extrapolated lapse as the * boundary condition */ double time_interp_step = lapse_prev1_time[reflevel] - lapse_prev0_time[reflevel]; double fact0, fact1; if (lapse_prev0_time[reflevel] == DBL_MAX && lapse_prev1_time[reflevel] == DBL_MAX) { fact0 = 0.0; fact1 = 0.0; } else if (lapse_prev0_time[reflevel] == DBL_MAX) { fact0 = 0.0; fact1 = 1.0; } else { fact0 = (lapse_prev1_time[reflevel] - t) / time_interp_step; fact1 = (t - lapse_prev0_time[reflevel]) / time_interp_step; } LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { #pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } } } if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { #pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); lapse_mg[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } } } } else { /* use the solution from the coarser level as the initial guess */ //CoordPatch *cp1 = get_coord_patch(ms, reflevel - 1); //ret = mg2d_init_guess(cp->solver, cp1->solver->u, cp1->solver->u_stride, // cp1->solver->local_size, cp1->solver->step); //if (ret < 0) // CCTK_WARN(0, "Error setting the initial guess"); } /* if the condition above was false, then lapse_mg should be filled by * prolongation from the coarser level * note that the reflection-boundary ghost points are not filled */ /* fill the solver boundary conditions */ if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { #pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); dst[i] = lapse_mg[idx] - 1.0; } } } if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { #pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i)); dst[i] = lapse_mg[idx] - 1.0; } } } } ms->time_solve_boundaries += gettime() - start; /* do the elliptic solve */ start = gettime(); ret = mg2d_solve(cp->solver); if (ret < 0) CCTK_WARN(0, "Error solving the maximal slicing equation"); ms->time_solve_mg2d += gettime() - start; /* export the result */ start = gettime(); solution_to_grid(cp, cp->solver, lapse_mg); lapse_mg1 = CCTK_VarDataPtr(cctkGH, 1, "MaximalSlicingAxiMG::lapse_mg"); memcpy(lapse_mg1, lapse_mg, grid_size * sizeof(*lapse_mg1)); ms->time_solve_export += gettime() - start; skip_solve: /* update lapse history for extrapolation */ if (reflevel == 0 || !(timestep % 2)) { const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top)); start = gettime(); #pragma omp parallel for for (size_t j = 0; j < grid_size; j++) { const double sol_new = lapse_mg[j]; const double delta = sol_new - lapse_mg_eval[j]; lapse_prev0[j] = lapse_prev1[j] + delta - delta * vel_fact; lapse_prev1[j] = sol_new; alpha[j] = sol_new; } lapse_prev0_time[reflevel] = lapse_prev1_time[reflevel]; lapse_prev1_time[reflevel] = cctkGH->cctk_time; ms->time_solve_history += gettime() - start; } ms->time_solve_mg += gettime() - mg_start; ms->count_solve_mg++; finish: ms->time_solve += gettime() - total_start; ms->count_solve++; if (stats_every > 0 && reflevel == 0 && ms->count_solve > 0 && ms->count_eval > 0 && !(ms->count_solve % stats_every)) print_stats(ms); } void msa_mg_sync(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; const int reflevel = ctz(cctkGH->cctk_levfac[0]); LOGDEBUG( "\nsync %g %d\n\n", cctkGH->cctk_time, reflevel); return; } void msa_mg_init(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int ret; int nb_levels_type; int nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); int use_tapered_grids = *(int*)CCTK_ParameterGet("use_tapered_grids", "Carpet", &use_tapered_grids); if (!use_tapered_grids) CCTK_WARN(0, "MaximalSlicingAxiMG only works with use_tapered_grids=1"); if (!ms) { ret = context_init(cctkGH, fd_stencil, maxiter, exact_size, nb_cycles, nb_relax_pre, nb_relax_post, tol_residual, tol_residual_base, cfl_factor, nb_levels, loglevel, &ms); if (ret < 0) CCTK_WARN(0, "Error initializing the solver context"); } } void msa_mg_prestep(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; CoordPatch *cp; const double t = cctkGH->cctk_time; const int timestep = lrint(t * cctkGH->cctk_levfac[0] / cctkGH->cctk_delta_time); const int reflevel = ctz(cctkGH->cctk_levfac[0]); cp = get_coord_patch(ms, reflevel); cp->cur_step = timestep; } void msa_mg_inithist(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; const size_t grid_size = cctkGH->cctk_lsh[2] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[0]; for (size_t i = 0; i < grid_size; i++) lapse_mg[i] = 1.0; for (int i = 0; i < 32; i++) { lapse_prev0_time[i] = DBL_MAX; lapse_prev1_time[i] = DBL_MAX; } } void maximal_slicing_axi_mg_modify_diss(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; CoordPatch *cp; const int reflevel = ctz(cctkGH->cctk_levfac[0]); double *epsdis; epsdis = CCTK_VarDataPtr(cctkGH, 0, "Dissipation::epsdisA"); if (!epsdis) abort(); cp = get_coord_patch(ms, reflevel); if (!cp->bnd_intercomp[0][1]) { for (int idx_z = 0; idx_z < cp->grid_size[2]; idx_z++) for (int idx_x = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[0] - (ms->fd_stencil + 1) : 0); idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); epsdis[idx_dst] = 0.0; } } if (!cp->bnd_intercomp[1][1]) { for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) for (int idx_z = cp->offset_left[0] + (cp->solver ? cp->solver->local_size[1] - (ms->fd_stencil + 1) : 0); idx_z < cp->grid_size[2]; idx_z++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); epsdis[idx_dst] = 0.0; } } } void msa_mg_terminate_print_stats(CCTK_ARGUMENTS) { const int reflevel = ctz(cctkGH->cctk_levfac[0]); if (reflevel == 0 && ms) { print_stats(ms); context_free(&ms); } }