From eb8da3aa993939881a3bbd28e2c040cdd54b7183 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 18 Mar 2019 09:45:20 +0100 Subject: Use per-substep solvers on the finest level. --- schedule.ccl | 6 +- src/maximal_slicing_axi_mg.c | 240 ++++++++++++++++++++++++++++--------------- 2 files changed, 160 insertions(+), 86 deletions(-) diff --git a/schedule.ccl b/schedule.ccl index 7297aa7..88a30cb 100644 --- a/schedule.ccl +++ b/schedule.ccl @@ -5,7 +5,11 @@ if (CCTK_Equals(lapse_evolution_method, "maximal_axi_mg")) { LANG: C } "" - SCHEDULE msa_mg_init IN CCTK_BASEGRID { + SCHEDULE msa_mg_init IN CCTK_BASEGRID AFTER TemporalSpacings { + LANG: C + } "" + + SCHEDULE msa_mg_prestep IN MoL_PreStep { LANG: C } "" diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 3d082a3..595f4b0 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -55,6 +55,10 @@ typedef struct CoordPatch { MG2DContext *solver; + int cur_step; + MG2DContext **solver_eval; + int nb_solver_eval; + ptrdiff_t y_idx; /* number of x/z grid points by which the elliptic solver domain is offset @@ -75,6 +79,9 @@ typedef struct MSMGContext { double tol_residual_base; double cfl_factor; + double base_dt; + int nb_levels; + CoordPatch *patches; int nb_patches; @@ -126,6 +133,12 @@ static int ctz(int a) static void coord_patch_free(CoordPatch *cp) { mg2d_solver_free(&cp->solver); + + for (int i = 0; i < cp->nb_solver_eval; i++) + mg2d_solver_free(&cp->solver_eval[i]); + free(cp->solver_eval); + cp->solver_eval = NULL; + cp->nb_solver_eval = 0; } static void log_callback(const MG2DContext *ctx, int level, @@ -139,9 +152,59 @@ static void log_callback(const MG2DContext *ctx, int level, vfprintf(stderr, fmt, vl); } -static CoordPatch *get_coord_patch(MSMGContext *ms, int level) +static MG2DContext *solver_alloc(MSMGContext *ms, int level, size_t domain_size, + double step) { const char *omp_threads = getenv("OMP_NUM_THREADS"); + + MG2DContext *solver; + + solver = mg2d_solver_alloc(domain_size); + if (!solver) + return NULL; + + solver->step[0] = step; + solver->step[1] = solver->step[0]; + + solver->fd_stencil = ms->fd_stencil; + + solver->boundaries[MG2D_BOUNDARY_0L]->type = MG2D_BC_TYPE_REFLECT; + solver->boundaries[MG2D_BOUNDARY_1L]->type = MG2D_BC_TYPE_REFLECT; + solver->boundaries[MG2D_BOUNDARY_0U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; + solver->boundaries[MG2D_BOUNDARY_1U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; + + solver->maxiter = ms->maxiter; + if (ms->tol_residual_base > 0.0) + solver->tol = ms->tol_residual_base / SQR(solver->step[0]); + else + solver->tol = ms->tol_residual; + solver->nb_cycles = ms->nb_cycles; + solver->nb_relax_post = ms->nb_relax_post; + solver->nb_relax_pre = ms->nb_relax_pre; + solver->cfl_factor = ms->cfl_factor; + solver->max_exact_size = ms->max_exact_size; + + solver->opaque = ms; + solver->log_callback = log_callback; + + if (omp_threads) + solver->nb_threads = strtol(omp_threads, NULL, 0); + if (solver->nb_threads <= 0) + solver->nb_threads = 1; + + /* initialize boundary values to zero, + * non-zero values on the outer boundaries of refined levels are filled in elsewhere */ + for (int i = 0; i < 4; i++) { + for (size_t j = 0; j < solver->fd_stencil; j++) + memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j, + 0, sizeof(*solver->boundaries[i]->val) * (domain_size + 2 * j)); + } + + return solver; +} + +static CoordPatch *get_coord_patch(MSMGContext *ms, int level) +{ cGH *gh = ms->gh; const size_t grid_size = gh->cctk_lsh[2] * gh->cctk_lsh[1] * gh->cctk_lsh[0]; @@ -204,45 +267,28 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) if (domain_size[0] != domain_size[1]) CCTK_WARN(0, "The grid is non-square, only square grids are supported"); - cp->solver = mg2d_solver_alloc(domain_size[0]); + cp->solver = solver_alloc(ms, level, domain_size[0], a_x[1] - a_x[0]); if (!cp->solver) CCTK_WARN(0, "Error allocating the solver"); - cp->solver->step[0] = a_x[1] - a_x[0]; - cp->solver->step[1] = cp->solver->step[0]; - - cp->solver->fd_stencil = ms->fd_stencil; - - cp->solver->boundaries[MG2D_BOUNDARY_0L]->type = MG2D_BC_TYPE_REFLECT; - cp->solver->boundaries[MG2D_BOUNDARY_1L]->type = MG2D_BC_TYPE_REFLECT; - cp->solver->boundaries[MG2D_BOUNDARY_0U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; - cp->solver->boundaries[MG2D_BOUNDARY_1U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; - - cp->solver->maxiter = ms->maxiter; - if (ms->tol_residual_base > 0.0) - cp->solver->tol = ms->tol_residual_base / SQR(cp->solver->step[0]); - else - cp->solver->tol = ms->tol_residual; - cp->solver->nb_cycles = ms->nb_cycles; - cp->solver->nb_relax_post = ms->nb_relax_post; - cp->solver->nb_relax_pre = ms->nb_relax_pre; - cp->solver->cfl_factor = ms->cfl_factor; - cp->solver->max_exact_size = ms->max_exact_size; - - cp->solver->opaque = ms; - cp->solver->log_callback = log_callback; - - if (omp_threads) - cp->solver->nb_threads = strtol(omp_threads, NULL, 0); - if (cp->solver->nb_threads <= 0) - cp->solver->nb_threads = 1; - - /* initialize boundary values to zero, - * non-zero values on the outer boundaries of refined levels are filled in elsewhere */ - for (int i = 0; i < 4; i++) { - for (size_t j = 0; j < cp->solver->fd_stencil; j++) - memset(cp->solver->boundaries[i]->val + j * cp->solver->boundaries[i]->val_stride - j, - 0, sizeof(*cp->solver->boundaries[i]->val) * (domain_size[0] + 2 * j)); + /* on the coarsest levels we allocate a solver for each of the substeps + * between the coarser-grid timelevels */ + if (level == ms->nb_levels - 1) { + cp->nb_solver_eval = 2 * integrator_substeps; + cp->solver_eval = calloc(cp->nb_solver_eval, sizeof(*cp->solver_eval)); + if (!cp->solver_eval) + CCTK_WARN(0, "Error allocating solvers"); + + for (int step = 0; step < 2; step++) + for (int substep = 0; substep < integrator_substeps; substep++) { + size_t size = cp->grid_size[0] - cp->offset_left[0] - (ms->fd_stencil - 1) + - ms->gh->cctk_nghostzones[0] * (step * integrator_substeps + substep); + MG2DContext *s = solver_alloc(ms, level, size, a_x[1] - a_x[0]); + if (!s) + CCTK_WARN(0, "Error allocating the solver"); + + cp->solver_eval[step * integrator_substeps + substep] = s; + } } ms->nb_patches++; @@ -298,7 +344,7 @@ static void print_stats(MSMGContext *ms) static int context_init(cGH *gh, int fd_stencil, int maxiter, int exact_size, int nb_cycles, int nb_relax_pre, int nb_relax_post, double tol_residual, double tol_residual_base, - double cfl_factor, + double cfl_factor, int nb_levels, const char *loglevel_str, MSMGContext **ctx) { @@ -319,6 +365,8 @@ static int context_init(cGH *gh, int fd_stencil, int maxiter, int exact_size, in ms->tol_residual = tol_residual; ms->tol_residual_base = tol_residual_base; ms->cfl_factor = cfl_factor; + ms->base_dt = gh->cctk_delta_time; + ms->nb_levels = nb_levels; for (int i = 0; i < ARRAY_ELEMS(log_levels); i++) { if (!strcmp(loglevel_str, log_levels[i].str)) { @@ -353,7 +401,7 @@ static void context_free(MSMGContext **pms) *pms = NULL; } -static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp) +static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver) { cGH *gh = ctx->gh; int ret; @@ -382,11 +430,11 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp) double *a_Xtx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt1"); double *a_Xtz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt3"); - for (int idx_z = 0; idx_z < cp->solver->domain_size; idx_z++) - for (int idx_x = 0; idx_x < cp->solver->domain_size; idx_x++) { - const int idx_src = CCTK_GFINDEX3D(gh, idx_x + gh->cctk_nghostzones[0], cp->y_idx, idx_z + gh->cctk_nghostzones[2]); - const int idx_dc = idx_z * cp->solver->diff_coeffs_stride + idx_x; - const int idx_rhs = idx_z * cp->solver->rhs_stride + idx_x; + for (int idx_z = 0; idx_z < solver->domain_size; idx_z++) + for (int idx_x = 0; idx_x < solver->domain_size; idx_x++) { + const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); + const int idx_dc = idx_z * solver->diff_coeffs_stride + idx_x; + const int idx_rhs = idx_z * solver->rhs_stride + idx_x; const double x = a_x[idx_src]; @@ -454,36 +502,36 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp) Xx = SQR(phi) * (Xtx + (phi_dx * gtu[0][0] + phi_dz * gtu[0][2]) / phi); Xz = SQR(phi) * (Xtz + (phi_dx * gtu[0][2] + phi_dz * gtu[2][2]) / phi); - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + ((idx_x == 0) ? gtu[1][1] : 0.0)); - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_02][idx_dc] = SQR(phi) * gtu[2][2]; - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_11][idx_dc] = SQR(phi) * gtu[0][2] * 2; - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (idx_x ? SQR(phi) * gtu[1][1] / x : 0.0); - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_01][idx_dc] = -Xz; - cp->solver->diff_coeffs[MG2D_DIFF_COEFF_00][idx_dc] = -k2; - cp->solver->rhs[idx_rhs] = k2; + solver->diff_coeffs[MG2D_DIFF_COEFF_20][idx_dc] = SQR(phi) * (gtu[0][0] + ((idx_x == 0) ? gtu[1][1] : 0.0)); + solver->diff_coeffs[MG2D_DIFF_COEFF_02][idx_dc] = SQR(phi) * gtu[2][2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_11][idx_dc] = SQR(phi) * gtu[0][2] * 2; + solver->diff_coeffs[MG2D_DIFF_COEFF_10][idx_dc] = -Xx + (idx_x ? SQR(phi) * gtu[1][1] / x : 0.0); + solver->diff_coeffs[MG2D_DIFF_COEFF_01][idx_dc] = -Xz; + solver->diff_coeffs[MG2D_DIFF_COEFF_00][idx_dc] = -k2; + solver->rhs[idx_rhs] = k2; } } -static void solution_to_grid(CoordPatch *cp, double *dst) +static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) { - for (int j = 0; j < cp->solver->domain_size; j++) - for (int i = 0; i < cp->solver->domain_size; i++) { + for (int j = 0; j < solver->domain_size; j++) + for (int i = 0; i < solver->domain_size; i++) { const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); - const int idx_src = j * cp->solver->u_stride + i; - dst[idx_dst] = 1.0 + cp->solver->u[idx_src]; + const int idx_src = j * solver->u_stride + i; + dst[idx_dst] = 1.0 + solver->u[idx_src]; } if (cp->level == 0) { /* on the coarsest level, extrapolate the outer boundary ghost points */ - for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + cp->solver->domain_size; idx_z++) - for (int idx_x = cp->offset_left[1] + cp->solver->domain_size; idx_x < cp->grid_size[0]; idx_x++) { + for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->domain_size; idx_z++) + for (int idx_x = cp->offset_left[1] + solver->domain_size; idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); dst[idx_dst] = 2.0 * dst[idx_dst - 1] - dst[idx_dst - 2]; } for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++) - for (int idx_z = cp->offset_left[1] + cp->solver->domain_size; idx_z < cp->grid_size[2]; idx_z++) { - const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + for (int idx_z = cp->offset_left[1] + solver->domain_size; idx_z < cp->grid_size[2]; idx_z++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); const ptrdiff_t idx_src0 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 1); const ptrdiff_t idx_src1 = CPINDEX(cp, idx_x, cp->y_idx, idx_z - 2); dst[idx_dst] = 2.0 * dst[idx_src0] - dst[idx_src1]; @@ -512,8 +560,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) int64_t total_start; - int nb_levels; - int nb_levels_type, ret; + int ret; const size_t grid_size = cctkGH->cctk_lsh[2] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[0]; const double t = cctkGH->cctk_time; @@ -524,8 +571,6 @@ void msa_mg_eval(CCTK_ARGUMENTS) LOGDEBUG( "evaluating lapse at rl=%d, t=%g\n", reflevel, t); - nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); - time_interp_step = lapse_prev1_time[reflevel] - lapse_prev0_time[reflevel]; if (lapse_prev0_time[reflevel] == DBL_MAX && @@ -540,7 +585,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) fact1 = (t - lapse_prev0_time[reflevel]) / time_interp_step; } - if (reflevel < nb_levels - 1) { + if (reflevel < ms->nb_levels - 1) { /* on coarse levels use extrapolated past solutions */ int64_t extrap_start = gettime(); @@ -557,38 +602,48 @@ void msa_mg_eval(CCTK_ARGUMENTS) int64_t fine_solve_start = gettime(); int64_t start; CoordPatch *cp = get_coord_patch(ms, reflevel); + const int timestep = lrint(t * cctkGH->cctk_levfac[0] / ms->base_dt); + int mol_substeps = MoLNumIntegratorSubsteps(); + int *mol_step = CCTK_VarDataPtr(cctkGH, 0, "MoL::MoL_Intermediate_Step"); + + MG2DContext *solver; + + if (!mol_step || *mol_step <= 0) + CCTK_WARN(0, "Invalid MoL step"); + + solver = cp->solver_eval[(cp->cur_step & 1) * mol_substeps + (mol_substeps - *mol_step)]; start = gettime(); - fill_eq_coeffs(ms, cp); + fill_eq_coeffs(ms, cp, solver); ms->time_fine_fill_coeffs += gettime() - start; LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); start = gettime(); - for (ptrdiff_t idx_z = cp->offset_left[1] + cp->solver->domain_size - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) + for (ptrdiff_t idx_z = cp->offset_left[1] + solver->domain_size - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) - for (ptrdiff_t idx_x = cp->offset_left[0] + cp->solver->domain_size - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { + for (ptrdiff_t idx_x = cp->offset_left[0] + solver->domain_size - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); lapse_mg_eval[idx] = fact0 * lapse_prev0[idx] + fact1 * lapse_prev1[idx]; } - for (int j = 0; j < cp->solver->fd_stencil; j++) { - MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; + for (int j = 0; j < solver->fd_stencil; j++) { + MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->domain_size - 1 + j); + for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + solver->domain_size - 1 + j); dst[i] = lapse_mg_eval[idx] - 1.0; } } - for (int j = 0; j < cp->solver->fd_stencil; j++) { - MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; + for (int j = 0; j < solver->fd_stencil; j++) { + MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; - for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->domain_size + j; i++) { - const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i); + for (ptrdiff_t i = -j; i < (ptrdiff_t)solver->domain_size + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + solver->domain_size - 1 + j, cp->y_idx, cp->offset_left[1] + i); dst[i] = lapse_mg_eval[idx] - 1.0; } } @@ -597,13 +652,13 @@ void msa_mg_eval(CCTK_ARGUMENTS) LOGDEBUG( "mg solve\n"); start = gettime(); - ret = mg2d_solve(cp->solver); + ret = mg2d_solve(solver); if (ret < 0) CCTK_WARN(0, "Error solving the maximal slicing equation"); ms->time_fine_mg2d += gettime() - start; start = gettime(); - solution_to_grid(cp, lapse_mg_eval); + solution_to_grid(cp, solver, lapse_mg_eval); ms->time_fine_export = gettime() - start; ms->time_fine_solve += gettime() - fine_solve_start; @@ -625,8 +680,8 @@ void msa_mg_solve(CCTK_ARGUMENTS) int64_t total_start, mg_start, start; - int nb_levels, reflevel_top, ts_tmp; - int nb_levels_type, ret; + int reflevel_top, ts_tmp; + int ret; const size_t grid_size = cctkGH->cctk_lsh[2] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[0]; const double t = cctkGH->cctk_time; @@ -639,8 +694,6 @@ void msa_mg_solve(CCTK_ARGUMENTS) LOGDEBUG( "solve lapse at rl=%d, t=%g; step %d\n", reflevel, t, timestep); - nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); - reflevel_top = reflevel; ts_tmp = timestep; while (reflevel_top > 0 && !(ts_tmp % 2)) { @@ -654,7 +707,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) /* fill in the equation coefficients */ cp = get_coord_patch(ms, reflevel); start = gettime(); - fill_eq_coeffs(ms, cp); + fill_eq_coeffs(ms, cp, cp->solver); ms->time_solve_fill_coeffs += gettime() - start; start = gettime(); @@ -726,7 +779,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) /* export the result */ start = gettime(); - solution_to_grid(cp, lapse_mg); + solution_to_grid(cp, cp->solver, lapse_mg); lapse_mg1 = CCTK_VarDataPtr(cctkGH, 1, "MaximalSlicingAxiMG::lapse_mg"); memcpy(lapse_mg1, lapse_mg, grid_size * sizeof(*lapse_mg1)); @@ -843,15 +896,32 @@ void msa_mg_init(CCTK_ARGUMENTS) DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int ret; + int nb_levels_type; + int nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); ret = context_init(cctkGH, fd_stencil, maxiter, exact_size, nb_cycles, nb_relax_pre, nb_relax_post, tol_residual, tol_residual_base, - cfl_factor, + cfl_factor, nb_levels, loglevel, &ms); if (ret < 0) CCTK_WARN(0, "Error initializing the solver context"); } +void msa_mg_prestep(CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + CoordPatch *cp; + + const double t = cctkGH->cctk_time; + const int timestep = lrint(t * cctkGH->cctk_levfac[0] / cctkGH->cctk_delta_time); + const int reflevel = ctz(cctkGH->cctk_levfac[0]); + + cp = get_coord_patch(ms, reflevel); + cp->cur_step = timestep; +} + void msa_mg_inithist(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; -- cgit v1.2.3