From f60e4c9dfa675dd849ccb5830fabdbc5561562f3 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 9 Sep 2019 13:08:19 +0200 Subject: Do the solve on the coarse levels as well. Breaks MPI for now. --- interface.ccl | 12 ++- src/qms.c | 230 ++++++++++++++++++++++++++++++++++------------------------ 2 files changed, 146 insertions(+), 96 deletions(-) diff --git a/interface.ccl b/interface.ccl index 5041ea2..10a472d 100644 --- a/interface.ccl +++ b/interface.ccl @@ -1,7 +1,7 @@ # Interface definition for thorn QuasiMaximalSlicingMG implements: QuasiMaximalSlicingMG -INHERITS: ADMBase grid CoordBase MethodOfLines ML_BSSN +INHERITS: ADMBase grid CoordBase MethodOfLines ML_BSSN Driver CCTK_INT FUNCTION MoLRegisterConstrained(CCTK_INT IN idx) CCTK_INT FUNCTION MoLRegisterSaveAndRestore(CCTK_INT IN idx) @@ -14,6 +14,16 @@ REQUIRES FUNCTION MoLRegisterSaveAndRestoreGroup CCTK_INT FUNCTION MoLNumIntegratorSubsteps () USES FUNCTION MoLNumIntegratorSubsteps +CCTK_POINTER FUNCTION \ + VarDataPtrI \ + (CCTK_POINTER_TO_CONST IN cctkGH, \ + CCTK_INT IN map, \ + CCTK_INT IN reflevel, \ + CCTK_INT IN component, \ + CCTK_INT IN timelevel, \ + CCTK_INT IN varindex) +USES FUNCTION VarDataPtrI + public: REAL W_val TYPE=GF TIMELEVELS=2 REAL W_val0 TYPE=GF TIMELEVELS=1 tags='Prolongation="None"' diff --git a/src/qms.c b/src/qms.c index b922840..1005290 100644 --- a/src/qms.c +++ b/src/qms.c @@ -28,6 +28,14 @@ #define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) +#define qms_assert(x) \ +do { \ + if (!(x)) { \ + fprintf(stderr, "Assertion " #x " failed\n"); \ + abort(); \ + } \ +} while (0) + /* precomputed values for a given refined grid */ typedef struct CoordPatch { int level; @@ -41,8 +49,8 @@ typedef struct CoordPatch { ptrdiff_t offset_left[2]; int bnd_intercomp[2][2]; - double *filter; - ptrdiff_t filter_stride; + double *interp_tmp0; + double *interp_tmp1; /* MPI sync for the initial guess */ int nb_components; @@ -76,9 +84,6 @@ typedef struct QMSMGContext { int solve_level; int boundary_offset; - double filter_start; - double filter_end; - CoordPatch *patches; int nb_patches; @@ -357,8 +362,8 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) solver->boundaries[MG2D_BOUNDARY_0L]->type = MG2D_BC_TYPE_REFLECT; solver->boundaries[MG2D_BOUNDARY_1L]->type = MG2D_BC_TYPE_REFLECT; - solver->boundaries[MG2D_BOUNDARY_0U]->type = MG2D_BC_TYPE_FIXVAL; - solver->boundaries[MG2D_BOUNDARY_1U]->type = MG2D_BC_TYPE_FIXVAL; + solver->boundaries[MG2D_BOUNDARY_0U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; + solver->boundaries[MG2D_BOUNDARY_1U]->type = level ? MG2D_BC_TYPE_FIXVAL : MG2D_BC_TYPE_FALLOFF; solver->maxiter = ms->maxiter; solver->tol = ms->tol_residual_base / (solver->step[0] * solver->step[1]); @@ -386,29 +391,11 @@ static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) } } - if (level == ms->solve_level) { - ms->filter_end = solver->step[0] * (solver->domain_size - 1); - ms->filter_start = 0.7 * ms->filter_end; - } - - if ((solver->domain_size - 1) * solver->step[0] >= ms->filter_start) { - cp->filter_stride = solver->local_size[0]; - cp->filter = calloc(solver->local_size[1] * cp->filter_stride, sizeof(*cp->filter)); - if (!cp->filter) - CCTK_WARN(0, "Error allocating the filter"); - - for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) { - const double z = solver->step[1] * (solver->local_start[1] + idx_z); - for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { - const double x = solver->step[0] * (solver->local_start[0] + idx_x); - const double r = sqrt(SQR(x) + SQR(z)); - - double fact = exp(-36.0 * (pow((r - ms->filter_start) / (ms->filter_end - ms->filter_start), 6.0))); - fact = r >= ms->filter_start ? fact : 1.0; - - cp->filter[idx_z * cp->filter_stride + idx_x] = fact; - } - } + if (level > 0) { + cp->interp_tmp0 = calloc(cp->grid_size[0] * cp->grid_size[2], sizeof(*cp->interp_tmp0)); + cp->interp_tmp1 = calloc(cp->grid_size[0] * cp->grid_size[2], sizeof(*cp->interp_tmp1)); + if (!cp->interp_tmp0 || !cp->interp_tmp1) + CCTK_WARN(0, "Error allocating arrays"); } finish: @@ -425,8 +412,6 @@ static void coord_patch_free(CoordPatch *cp) if (cp->solver_comm != MPI_COMM_NULL) MPI_Comm_free(&cp->solver_comm); - - free(cp->filter); } static void print_stats(QMSMGContext *ms) @@ -1081,28 +1066,8 @@ static void fill_eq_coeffs(QMSMGContext *ctx, CoordPatch *cp, MG2DContext *solve } } -static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) +static void mirror(CoordPatch *cp, double *dst) { - if (cp->filter) { -#pragma omp parallel for - for (int j = 0; j < solver->local_size[1]; j++) - for (int i = 0; i < solver->local_size[0]; i++) { - const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); - const int idx_src = j * solver->u_stride + i; - const int idx_filter = j * cp->filter_stride + i; - dst[idx_dst] = solver->u[idx_src] * cp->filter[idx_filter]; - } - } else { -#pragma omp parallel for - for (int j = 0; j < solver->local_size[1]; j++) - for (int i = 0; i < solver->local_size[0]; i++) { - const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); - const int idx_src = j * solver->u_stride + i; - dst[idx_dst] = solver->u[idx_src]; - } - } - - /* fill in the axial symmetry ghostpoints by mirroring */ if (!cp->bnd_intercomp[0][0]) { #pragma omp parallel for for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) @@ -1123,6 +1088,20 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) } } +static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) +{ +#pragma omp parallel for + for (int j = 0; j < solver->local_size[1]; j++) + for (int i = 0; i < solver->local_size[0]; i++) { + const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); + const int idx_src = j * solver->u_stride + i; + dst[idx_dst] = solver->u[idx_src]; + } + + /* fill in the axial symmetry ghostpoints by mirroring */ + mirror(cp, dst); +} + typedef struct Rect { ptrdiff_t start[2]; size_t size[2]; @@ -1260,6 +1239,14 @@ static int guess_from_coarser(QMSMGContext *ms, CoordPatch *cp, CoordPatch *cp_c return ret; } +static double smooth_step(double x) +{ + if (x <= 0.0) + return 0.0; + else if (x >= 1.0) + return 1.0; + return 1.0 - exp(-36.0 * pow(x, 6.0)); +} void qms_mg_solve(CCTK_ARGUMENTS) { @@ -1276,12 +1263,12 @@ void qms_mg_solve(CCTK_ARGUMENTS) int64_t total_start, start; int ret; + const double dt = W_val1_time[reflevel] - W_val0_time[reflevel]; if (cctkGH->cctk_time >= switchoff_time + 1.0) return; - if (reflevel < ms->solve_level) - return; + /* skip the fine time steps */ if (reflevel > ms->solve_level && fabs(time - W_val1_time[ms->solve_level]) > 1e-13) return; @@ -1300,7 +1287,24 @@ void qms_mg_solve(CCTK_ARGUMENTS) ms->count_fill++; start = gettime(); - if (reflevel > ms->solve_level) { + if (reflevel > 0) { + /* outer-most level for this solve, use extrapolated values as boundary condition */ + if (fabs(time - W_val1_time[reflevel - 1]) > 1e-13) { + const double fact0 = (W_val1_time[reflevel] - time) / dt; + const double fact1 = (time - W_val0_time[reflevel]) / dt; + +#pragma omp parallel for + for (size_t i = 0; i < grid_size; i++) + W_val[i] = fact0 * W_val0[i] + fact1 * W_val1[i]; + } else { + /* use the solution from the coarser level as the initial guess */ + CoordPatch *cp_coarse = get_coord_patch(ms, reflevel - 1); + + ret = guess_from_coarser(ms, cp, cp_coarse); + if (ret < 0) + CCTK_WARN(0, "Error setting the initial guess"); + } + /* fill the solver boundary conditions */ if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { #pragma omp parallel for @@ -1324,15 +1328,6 @@ void qms_mg_solve(CCTK_ARGUMENTS) } } } - - { - /* use the solution from the coarser level as the initial guess */ - CoordPatch *cp_coarse = get_coord_patch(ms, reflevel - 1); - - ret = guess_from_coarser(ms, cp, cp_coarse); - if (ret < 0) - CCTK_WARN(0, "Error setting the initial guess"); - } } ms->time_bnd += gettime() - start; ms->count_bnd++; @@ -1363,32 +1358,74 @@ void qms_mg_solve(CCTK_ARGUMENTS) /* linearly extrapolate the past two solution to predict the next one */ { - double *u0 = W_val0; - double *u1 = W_val1; - double time0 = W_val0_time[reflevel]; - double time1 = W_val1_time[reflevel]; - double time = time1 + ms->gh->cctk_delta_time / (1.0 * (1 << ms->solve_level)); + const double time_src_0 = W_val0_time[reflevel]; + const double time_src_1 = W_val1_time[reflevel]; + const double pred_time = time_src_1 + dt; - double fact0, fact1; - - if (time0 == DBL_MAX && time1 == DBL_MAX) { - fact0 = 0.0; - fact1 = 0.0; - } else if (time0 == DBL_MAX) { - fact0 = 0.0; - fact1 = 0.0; - } else { - fact0 = (time - time1) / (time0 - time1); - fact1 = (time - time0) / (time1 - time0); - } + const double fact0 = (pred_time - time_src_1) / (time_src_0 - time_src_1); + const double fact1 = (pred_time - time_src_0) / (time_src_1 - time_src_0); memcpy(W_pred0, W_pred1, sizeof(*W_pred0) * grid_size); W_pred0_time[reflevel] = W_pred1_time[reflevel]; - for (int i = 0; i < grid_size; i++) - W_pred1[i] = (u1[i] * fact1 + u0[i] * fact0); + if (reflevel > 0 && reflevel <= ms->solve_level) { + CoordPatch *cp_coarse = get_coord_patch(ms, reflevel - 1); + double *W_pred0_coarse = VarDataPtrI(ms->gh, 0, reflevel - 1, 0, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred0")); + double *W_pred1_coarse = VarDataPtrI(ms->gh, 0, reflevel - 1, 0, 0, CCTK_VarIndex("QuasiMaximalSlicingMG::W_pred1")); - W_pred1_time[reflevel] = time; + const int integrator_substeps = MoLNumIntegratorSubsteps(); + const int interior_size = cctk_lsh[0] - cp->offset_left[0] - cctk_nghostzones[0] * integrator_substeps * 2; + + const double bnd_loc = x[CCTK_GFINDEX3D(ms->gh, cp->offset_left[0] + interior_size, 0, 0)]; + const double transition_start = bnd_loc * 0.7; + + double tp0 = W_pred0_time[reflevel - 1]; + double tp1 = W_pred1_time[reflevel - 1]; + + const double factp0 = (pred_time - tp1) / (tp0 - tp1); + const double factp1 = (pred_time - tp0) / (tp1 - tp0); + + qms_assert(pred_time >= tp0 && pred_time <= tp1); + + mg2d_interp(cp->solver, cp->interp_tmp0 + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), + cctk_lsh[0], (ptrdiff_t [2]){ 0, 0 }, (size_t [2]){cctk_lsh[0] - cp->offset_left[0], cctk_lsh[2] - cp->offset_left[1] }, + cp->solver->step, + W_pred0_coarse, cp_coarse->grid_size[0], (ptrdiff_t [2]){ -cp_coarse->offset_left[0], -cp_coarse->offset_left[1] }, + (size_t [2]){cp_coarse->grid_size[0], cp_coarse->grid_size[2] }, cp_coarse->solver->step); + mg2d_interp(cp->solver, cp->interp_tmp1 + CCTK_GFINDEX3D(ms->gh, cp->offset_left[0], 0, cp->offset_left[1]), + cctk_lsh[0], (ptrdiff_t [2]){ 0, 0 }, (size_t [2]){cctk_lsh[0] - cp->offset_left[0], cctk_lsh[2] - cp->offset_left[1] }, + cp->solver->step, + W_pred1_coarse, cp_coarse->grid_size[0], (ptrdiff_t [2]){ -cp_coarse->offset_left[0], -cp_coarse->offset_left[1] }, + (size_t [2]){cp_coarse->grid_size[0], cp_coarse->grid_size[2] }, cp_coarse->solver->step); + +#pragma omp parallel for + for (int i = 0; i < grid_size; i++) + W_pred1[i] = factp0 * cp->interp_tmp0[i] + factp1 * cp->interp_tmp1[i]; + +#pragma omp parallel for + for (int idx_z = 0; idx_z < interior_size; idx_z++) + for (int idx_x = 0; idx_x < interior_size; idx_x++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(ms->gh, cp->offset_left[0] + idx_x, 0, + cp->offset_left[1] + idx_z); + + const double x_val = x[idx]; + const double z_val = z[idx]; + const double coarse_val = W_pred1[idx]; + + const double r = sqrt(SQR(x_val) + SQR(z_val)); + const double dist = (r - transition_start) / (bnd_loc - transition_start); + + const double bnd_fact = smooth_step(dist); + + W_pred1[idx] = (W_val1[idx] * fact1 + W_val0[idx] * fact0) * (1.0 - bnd_fact) + coarse_val * bnd_fact; + } + mirror(cp, W_pred1); + } else { +#pragma omp parallel for + for (int i = 0; i < grid_size; i++) + W_pred1[i] = (W_val1[i] * fact1 + W_val0[i] * fact0); + } + W_pred1_time[reflevel] = pred_time; } ms->time_export += gettime() - start; ms->count_export++; @@ -1422,7 +1459,7 @@ void qms_mg_eval(CCTK_ARGUMENTS) ms = qms_context; - if (reflevel < ms->solve_level || time >= switchoff_time + 1.0) + if (time >= switchoff_time + 1.0) return; start = gettime(); @@ -1544,21 +1581,24 @@ void qms_mg_inithist(CCTK_ARGUMENTS) DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; + const int reflevel = ctz(cctk_levfac[0]); + const double dt = cctk_delta_time / (1 << MIN(reflevel, solve_level)); + size_t grid_size = cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2]; - fprintf(stderr, "%d inithist\n", ctz(cctk_levfac[0])); + fprintf(stderr, "%d inithist\n", reflevel); - for (int i = 0; i < 32; i++) { - W_pred0_time[i] = DBL_MAX; - W_pred1_time[i] = DBL_MAX; - W_val0_time[i] = DBL_MAX; - W_val1_time[i] = DBL_MAX; - } + W_val1_time[reflevel] = -dt; + W_val0_time[reflevel] = W_val1_time[reflevel] - dt; + + W_pred1_time[reflevel] = W_val1_time[reflevel] + dt; + W_pred0_time[reflevel] = W_pred1_time[reflevel] - dt; memset(W_pred0, 0, sizeof(double) * grid_size); memset(W_pred1, 0, sizeof(double) * grid_size); - memset(W_val0, 0, sizeof(double) * grid_size); - memset(W_val1, 0, sizeof(double) * grid_size); + memset(W_val0, 0, sizeof(double) * grid_size); + memset(W_val1, 0, sizeof(double) * grid_size); + memset(W_val, 0, sizeof(double) * grid_size); } void qms_mg_terminate_print_stats(CCTK_ARGUMENTS) -- cgit v1.2.3