From 68e2c0b3166527e95b66c024fcc06395d059c6bf Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 22 Jul 2019 09:59:20 +0200 Subject: Initial commit. --- configuration.ccl | 3 + interface.ccl | 26 ++ param.ccl | 83 ++++ schedule.ccl | 52 +++ src/make.code.defn | 7 + src/qms.c | 1112 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1283 insertions(+) create mode 100644 configuration.ccl create mode 100644 interface.ccl create mode 100644 param.ccl create mode 100644 schedule.ccl create mode 100644 src/make.code.defn create mode 100644 src/qms.c diff --git a/configuration.ccl b/configuration.ccl new file mode 100644 index 0000000..5fad107 --- /dev/null +++ b/configuration.ccl @@ -0,0 +1,3 @@ +# Configuration definition for thorn QuasiMaximalSlicingMG +REQUIRES MPI + diff --git a/interface.ccl b/interface.ccl new file mode 100644 index 0000000..37e039a --- /dev/null +++ b/interface.ccl @@ -0,0 +1,26 @@ +# Interface definition for thorn QuasiMaximalSlicingMG +implements: QuasiMaximalSlicingMG + +INHERITS: ADMBase grid CoordBase MethodOfLines ML_BSSN + +CCTK_INT FUNCTION MoLRegisterConstrained(CCTK_INT IN idx) +CCTK_INT FUNCTION MoLRegisterSaveAndRestore(CCTK_INT IN idx) +CCTK_INT FUNCTION MoLRegisterSaveAndRestoreGroup(CCTK_INT IN idx) + +REQUIRES FUNCTION MoLRegisterConstrained +REQUIRES FUNCTION MoLRegisterSaveAndRestore +REQUIRES FUNCTION MoLRegisterSaveAndRestoreGroup + +CCTK_INT FUNCTION MoLNumIntegratorSubsteps () +USES FUNCTION MoLNumIntegratorSubsteps + +public: +REAL W_val0 TYPE=GF TIMELEVELS=1 +REAL W_val1 TYPE=GF TIMELEVELS=1 +REAL W_val0_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant +REAL W_val1_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant + +REAL W_pred0 TYPE=GF tags='Prolongation="None"' TIMELEVELS=1 +REAL W_pred1 TYPE=GF tags='Prolongation="None"' TIMELEVELS=1 +REAL W_pred0_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant +REAL W_pred1_time TYPE=ARRAY DIM=1 SIZE=32 DISTRIB=constant diff --git a/param.ccl b/param.ccl new file mode 100644 index 0000000..a738832 --- /dev/null +++ b/param.ccl @@ -0,0 +1,83 @@ +# Parameter definitions for thorn QuasiMaximalSlicing + +shares: ML_BSSN +EXTENDS CCTK_KEYWORD lapse_source "lapse_source" +{ + QMS_MG :: "" +} + +PRIVATE: +CCTK_INT solve_level "" STEERABLE=recover +{ + 0: :: "" +} 0 + +CCTK_REAL switchoff_time "" STEERABLE=recover +{ + : :: "" +} 9999.0 + +CCTK_INT ccz4 "Use CCZ4 or BSSN" STEERABLE=recover +{ + 0:1 :: "" +} 0 + +RESTRICTED: +CCTK_INT fd_stencil "finite differencing stencil" +{ + 1: :: "" +} 1 + +RESTRICTED: +CCTK_INT maxiter "maximum number of solver iterations" STEERABLE=always +{ + 1: :: "" +} 64 + +RESTRICTED: +CCTK_INT nb_cycles "number of coarse-correction cycles per level" STEERABLE=always +{ + 1: :: "" +} 1 + +RESTRICTED: +CCTK_INT nb_relax_pre "number of relaxation steps before coarse-grid correction" STEERABLE=always +{ + 0: :: "" +} 2 + +RESTRICTED: +CCTK_INT nb_relax_post "number of relaxation steps after coarse-grid correction" STEERABLE=always +{ + 0: :: "" +} 2 + +RESTRICTED: +CCTK_INT exact_size "" STEERABLE=always +{ + 0: :: "" +} 5 + +RESTRICTED: +CCTK_REAL tol_residual_base "maximum absolute value of the residual for dx=1.0, scaled by 1/(dx ** 2)" STEERABLE=always +{ + 0: :: "" +} 0.0 + +RESTRICTED: +CCTK_REAL cfl_factor "" STEERABLE=always +{ + 0: :: "" +} 0.0 + +RESTRICTED: +CCTK_INT stats_every "print elliptic solver stats every coarsest-level steps" STEERABLE=always +{ + 0: :: "" +} 0 + +RESTRICTED: +STRING loglevel "logging level" STEERABLE=always +{ + :: "" +} "info" diff --git a/schedule.ccl b/schedule.ccl new file mode 100644 index 0000000..70e90c0 --- /dev/null +++ b/schedule.ccl @@ -0,0 +1,52 @@ +# Schedule definitions for thorn MaximalSlicingAxi +# +if (CCTK_EQUALS(lapse_source, "QMS_MG")) { + SCHEDULE msa_mg_init IN CCTK_BASEGRID AFTER TemporalSpacings { + LANG: C + } "QMS MG init" + + SCHEDULE qms_mg_eval IN ML_BSSN_evolCalcGroup BEFORE ML_BSSN_lapse_evol { + LANG: C + SYNC: ML_lapse + } "Quasimaximal slicing eval W" + SCHEDULE qms_mg_eval IN ML_CCZ4_evolCalcGroup BEFORE ML_CCZ4_lapse_evol { + LANG: C + SYNC: ML_lapse + } "Quasimaximal slicing eval W" + + #SCHEDULE quasimaximal_slicing_axi_solve IN ML_BSSN_evolCalcGroup BEFORE quasimaximal_slicing_axi_eval { + #SCHEDULE quasimaximal_slicing_axi_solve IN MoL_PostStep AFTER ML_BSSN_ApplyBCs { + SCHEDULE qms_mg_solve IN CCTK_POSTSTEP { + LANG: C + SYNC: W_pred0 + SYNC: W_pred1 + SYNC: W_val0 + SYNC: W_val1 + } "Quasimaximal slicing solve W" + + SCHEDULE qms_mg_sync IN CCTK_POSTSTEP BEFORE qms_mg_solve { + SYNC: W_val1 + LANG: C + } "" + + SCHEDULE qms_mg_inithist IN CCTK_INITIAL { + LANG: C + } "" + + SCHEDULE qms_mg_init IN CCTK_BASEGRID AFTER TemporalSpacings { + LANG: C + } "" + + #SCHEDULE quasimaximal_slicing_axi IN MoL_PseudoEvolution { + # LANG: C + #} "Quasimaximal slicing" + + STORAGE: W_pred0 + STORAGE: W_pred1 + STORAGE: W_pred0_time + STORAGE: W_pred1_time + STORAGE: W_val0 + STORAGE: W_val1 + STORAGE: W_val0_time + STORAGE: W_val1_time +} diff --git a/src/make.code.defn b/src/make.code.defn new file mode 100644 index 0000000..7f29e1a --- /dev/null +++ b/src/make.code.defn @@ -0,0 +1,7 @@ +# Main make.code.defn file for thorn MaximalSlicingAxi + +# Source files in this directory +SRCS = qms.c + +# Subdirectories containing source files +SUBDIRS = diff --git a/src/qms.c b/src/qms.c new file mode 100644 index 0000000..de2bb12 --- /dev/null +++ b/src/qms.c @@ -0,0 +1,1112 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cctk.h" +#include "cctk_Arguments.h" +#include "cctk_Parameters.h" +#include "cctk_Timers.h" +#include "util_Table.h" + +#define SQR(x) ((x) * (x)) +#define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define ABS(x) ((x >= 0) ? (x) : -(x)) +#define ARRAY_ELEMS(arr) (sizeof(arr) / sizeof(*arr)) + +#define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) + +/* precomputed values for a given refined grid */ +typedef struct CoordPatch { + int level; + ptrdiff_t grid_size[3]; + + MG2DContext *solver; + + int y_idx; + + ptrdiff_t offset_left[2]; + int bnd_intercomp[2][2]; +} CoordPatch; + +typedef struct QMSMGContext { + cGH *gh; + + int log_level; + int fd_stencil; + int maxiter; + int max_exact_size; + int nb_cycles; + int nb_relax_post; + int nb_relax_pre; + double tol_residual_base; + double cfl_factor; + int solve_level; + + CoordPatch *patches; + int nb_patches; +} QMSMGContext; + +static const struct { + const char *str; + enum MG2DLogLevel level; +} log_levels[] = { + { "fatal", MG2D_LOG_FATAL }, + { "error", MG2D_LOG_ERROR }, + { "warning", MG2D_LOG_WARNING }, + { "info", MG2D_LOG_INFO }, + { "verbose", MG2D_LOG_VERBOSE }, + { "debug", MG2D_LOG_DEBUG }, +}; + +static QMSMGContext *qms_context; + +static int ctz(int a) +{ + int ret = 0; + + if (!a) + return INT_MAX; + + while (!(a & 1)) { + a >>= 1; + ret++; + } + + return ret; +} + +static void log_callback(const MG2DContext *ctx, int level, + const char *fmt, va_list vl) +{ + QMSMGContext *ms = ctx->opaque; + int target_level = ms->log_level; + uint8_t buf[1024]; + int ret; + + if (level > target_level) + return; + + ret = snprintf(buf, sizeof(buf), "[%d] t=%g ", ctz(ms->gh->cctk_levfac[0]), ms->gh->cctk_time); + if (ret >= sizeof(buf)) + return; + vsnprintf(buf + ret, sizeof(buf) - ret, fmt, vl); + fputs(buf, stderr); +} + +static CoordPatch *get_coord_patch(QMSMGContext *ms, int level) +{ + cGH *gh = ms->gh; + + const char *omp_threads = getenv("OMP_NUM_THREADS"); + + const int integrator_substeps = MoLNumIntegratorSubsteps(); + + const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); + const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); + const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); + + size_t solver_size[2]; + + CoordPatch *cp; + MG2DContext *solver; + int i, ret; + + for (int i = 0; i < ms->nb_patches; i++) { + cp = &ms->patches[i]; + + if (cp->level == level) + return cp; + } + + /* create a new patch */ + ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); + cp = &ms->patches[ms->nb_patches]; + + memset(cp, 0, sizeof(*cp)); + + cp->level = level; + cp->grid_size[0] = gh->cctk_lsh[0]; + cp->grid_size[1] = gh->cctk_lsh[1]; + cp->grid_size[2] = gh->cctk_lsh[2]; + + for (i = 0; i < gh->cctk_lsh[1]; i++) + if (fabs(a_y[CCTK_GFINDEX3D(gh, 0, i, 0)]) < 1e-8) { + cp->y_idx = i; + break; + } + if (i == gh->cctk_lsh[1]) + CCTK_WARN(0, "The grid does not include y==0"); + + for (int dir = 0; dir < 2; dir++) { + ptrdiff_t offset_right; + + cp->offset_left[dir] = gh->cctk_nghostzones[2 * dir]; + + offset_right = gh->cctk_nghostzones[2 * dir]; + if (cp->level > 0) { + offset_right *= integrator_substeps * 2; + offset_right -= 2; + } + + solver_size[dir] = cp->grid_size[2 * dir] - cp->offset_left[dir] - offset_right; + } + + if (solver_size[0] != solver_size[1]) + CCTK_WARN(0, "Non-square domain"); + + solver = mg2d_solver_alloc(solver_size[0]); + if (!solver) + CCTK_WARN(0, "Error allocating the solver"); + + cp->solver = solver; + + solver->step[0] = a_x[1] - a_x[0]; + solver->step[1] = a_z[CCTK_GFINDEX3D(gh, 0, 0, 1)] - a_z[0]; + + solver->fd_stencil = ms->fd_stencil; + + solver->boundaries[MG2D_BOUNDARY_0L]->type = MG2D_BC_TYPE_REFLECT; + solver->boundaries[MG2D_BOUNDARY_1L]->type = MG2D_BC_TYPE_REFLECT; + solver->boundaries[MG2D_BOUNDARY_0U]->type = MG2D_BC_TYPE_FIXVAL; + solver->boundaries[MG2D_BOUNDARY_1U]->type = MG2D_BC_TYPE_FIXVAL; + + solver->maxiter = ms->maxiter; + solver->tol = ms->tol_residual_base / (solver->step[0] * solver->step[1]); + solver->nb_cycles = ms->nb_cycles; + solver->nb_relax_pre = ms->nb_relax_pre; + solver->nb_relax_post = ms->nb_relax_post; + solver->cfl_factor = ms->cfl_factor; + solver->max_exact_size = ms->max_exact_size; + + solver->opaque = ms; + solver->log_callback = log_callback; + + if (omp_threads) + solver->nb_threads = strtol(omp_threads, NULL, 0); + if (solver->nb_threads <= 0) + solver->nb_threads = 1; + + /* initialize boundary values to zero, + * * non-zero values on the outer boundaries of refined levels are filled in elsewhere */ + for (int i = 0; i < 4; i++) { + int ci = mg2d_bnd_coord_idx(i); + for (size_t j = 0; j < solver->fd_stencil; j++) { + memset(solver->boundaries[i]->val + j * solver->boundaries[i]->val_stride - j, + 0, sizeof(*solver->boundaries[i]->val) * (solver_size[!ci] + 2 * j)); + } + } + + + ms->nb_patches++; + return cp; +} + +#define FD4(arr, stride, h_inv) \ + ((-1.0 * arr[2 * stride] + 8.0 * arr[1 * stride] - 8.0 * arr[-1 * stride] + 1.0 * arr[-2 * stride]) * h_inv / 12.0) +#define FD24(arr, stride, h_inv) \ + ((-1.0 * arr[2 * stride] + 16.0 * arr[1 * stride] - 30.0 * arr[0] + 16.0 * arr[-1 * stride] - 1.0 * arr[-2 * stride]) * SQR(h_inv) / 12.0) + +static double diff4_dx(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD4(arr, 1, h_inv); +} +static double diff4_dz(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD4(arr, stride, h_inv); +} + +static double diff4_dxx(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD24(arr, 1, h_inv); +} +static double diff4_dzz(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD24(arr, stride, h_inv); +} +static double diff4_dxz(const double *arr, ptrdiff_t stride, double dx_inv, double dz_inv) +{ + return (-1.0 * (-1.0 * arr[ 2 * stride + 2] + 8.0 * arr[ 2 * stride + 1] - 8.0 * arr[ 2 * stride - 1] + 1.0 * arr[ 2 * stride - 2]) + +8.0 * (-1.0 * arr[ 1 * stride + 2] + 8.0 * arr[ 1 * stride + 1] - 8.0 * arr[ 1 * stride - 1] + 1.0 * arr[ 1 * stride - 2]) + -8.0 * (-1.0 * arr[-1 * stride + 2] + 8.0 * arr[-1 * stride + 1] - 8.0 * arr[-1 * stride - 1] + 1.0 * arr[-1 * stride - 2]) + +1.0 * (-1.0 * arr[-2 * stride + 2] + 8.0 * arr[-2 * stride + 1] - 8.0 * arr[-2 * stride - 1] + 1.0 * arr[-2 * stride - 2])) * dx_inv * dz_inv / 144.0; +} + +static void fill_eq_coeffs(QMSMGContext *ctx, CoordPatch *cp, MG2DContext *solver) +{ + cGH *gh = ctx->gh; + int ret; + + const ptrdiff_t stride_z = CCTK_GFINDEX3D(gh, 0, 0, 1); + + double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); + double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); + + const double dx = a_x[1] - a_x[0]; + const double dz = a_z[stride_z] - a_z[0]; + const double dx_inv = 1.0 / dx; + const double dz_inv = 1.0 / dz; + + double *a_gtxx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt11"); + double *a_gtyy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt22"); + double *a_gtzz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt33"); + double *a_gtxy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt12"); + double *a_gtxz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt13"); + double *a_gtyz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::gt23"); + double *a_Atxx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At11"); + double *a_Atyy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At22"); + double *a_Atzz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At33"); + double *a_Atxy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At12"); + double *a_Atxz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At13"); + double *a_Atyz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::At23"); + double *a_trK = CCTK_VarDataPtr(gh, 0, "ML_BSSN::trK"); + double *a_phi = CCTK_VarDataPtr(gh, 0, "ML_BSSN::phi"); + double *a_Xtx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt1"); + double *a_Xtz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Xt3"); + double *a_kdotxx = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot11"); + double *a_kdotyy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot22"); + double *a_kdotzz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot33"); + double *a_kdotxy = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot12"); + double *a_kdotxz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot13"); + double *a_kdotyz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::Kdot23"); + + double *a_alpha = CCTK_VarDataPtr(gh, 0, "ML_BSSN::alpha"); + + double *a_betax = CCTK_VarDataPtr(gh, 0, "ML_BSSN::beta1"); + double *a_betaz = CCTK_VarDataPtr(gh, 0, "ML_BSSN::beta3"); + + solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_DISCONT; + solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_POLE; + +#pragma omp parallel for + for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) + for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { + const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); + const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; + const int idx_rhs = idx_z * solver->rhs_stride + idx_x; + + const double x = a_x[idx_src]; + const int on_axis = fabs(x) < 1e-13; + + double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; + double gtu[3][3], g[3][3], gu[3][3]; + double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; + double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; + double Ric[3][3], Ricm[3][3]; + double dgdot[3][3][3]; + double k2, Kij_Dij_alpha, k_kdot, k3; + double Gammadot_term, beta_term; + + double betal[3], dbetal[3][3], d2betal[3][3][3]; + + const double gtxx = a_gtxx[idx_src]; + const double gtyy = a_gtyy[idx_src]; + const double gtzz = a_gtzz[idx_src]; + const double gtxy = a_gtxy[idx_src]; + const double gtxz = a_gtxz[idx_src]; + const double gtyz = a_gtyz[idx_src]; + + const double gt[3][3] = {{ gtxx, gtxy, gtxz }, + { gtxy, gtyy, gtyz }, + { gtxz, gtyz, gtzz }}; + + const double dx_gt11 = diff4_dx(&a_gtxx[idx_src], 1, dx_inv); + const double dx_gt22 = diff4_dx(&a_gtyy[idx_src], 1, dx_inv); + const double dx_gt33 = diff4_dx(&a_gtzz[idx_src], 1, dx_inv); + const double dx_gt13 = diff4_dx(&a_gtxz[idx_src], 1, dx_inv); + + const double dz_gt11 = diff4_dz(&a_gtxx[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_gt22 = diff4_dz(&a_gtyy[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_gt33 = diff4_dz(&a_gtzz[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_gt13 = diff4_dz(&a_gtxz[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dgt[3][3][3] = { + { + { dx_gt11, 0.0, dx_gt13 }, + { 0.0, dx_gt22, 0.0 }, + { dx_gt13, 0.0, dx_gt33 }, + }, + { + { 0.0, on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 }, + { on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, on_axis ? dx_gt13 : gtxz / x }, + { 0.0, on_axis ? dx_gt13 : gtxz / x, 0.0 }, + }, + { + { dz_gt11, 0.0, dz_gt13 }, + { 0.0, dz_gt22, 0.0 }, + { dz_gt13, 0.0, dz_gt33 }, + }, + }; + + const double dxx_gt11 = diff4_dxx(&a_gtxx[idx_src], 1, dx_inv); + const double dxx_gt22 = diff4_dxx(&a_gtyy[idx_src], 1, dx_inv); + const double dxx_gt33 = diff4_dxx(&a_gtzz[idx_src], 1, dx_inv); + const double dxx_gt13 = diff4_dxx(&a_gtxz[idx_src], 1, dx_inv); + + const double dxz_gt11 = diff4_dxz(&a_gtxx[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + const double dxz_gt22 = diff4_dxz(&a_gtyy[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + const double dxz_gt33 = diff4_dxz(&a_gtzz[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + const double dxz_gt13 = diff4_dxz(&a_gtxz[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + + const double dzz_gt11 = diff4_dzz(&a_gtxx[idx_src], gh->cctk_lsh[0], dz_inv); + const double dzz_gt22 = diff4_dzz(&a_gtyy[idx_src], gh->cctk_lsh[0], dz_inv); + const double dzz_gt33 = diff4_dzz(&a_gtzz[idx_src], gh->cctk_lsh[0], dz_inv); + const double dzz_gt13 = diff4_dzz(&a_gtxz[idx_src], gh->cctk_lsh[0], dz_inv); + + const double d2gt[3][3][3][3] = { + { + { + { dxx_gt11, 0.0, dxx_gt13 }, + { 0.0, dxx_gt22, 0.0 }, + { dxx_gt13, 0.0, dxx_gt33 }, + }, + { + { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + }, + { + { dxz_gt11, 0.0, dxz_gt13 }, + { 0.0, dxz_gt22, 0.0 }, + { dxz_gt13, 0.0, dxz_gt33 }, + }, + + }, + { + { + { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + }, + { + { on_axis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, on_axis ? dxx_gt33 : dx_gt33 / x }, + }, + { + { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, + { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, + on_axis ? dxz_gt13 : dz_gt13 / x }, + { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + }, + + }, + { + { + { dxz_gt11, 0.0, dxz_gt13 }, + { 0.0, dxz_gt22, 0.0 }, + { dxz_gt13, 0.0, dxz_gt33 }, + }, + { + { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, + { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, + on_axis ? dxz_gt13 : dz_gt13 / x }, + { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + }, + { + { dzz_gt11, 0.0, dzz_gt13 }, + { 0.0, dzz_gt22, 0.0 }, + { dzz_gt13, 0.0, dzz_gt33 }, + }, + + }, + }; + + const double Atxx = a_Atxx[idx_src]; + const double Atyy = a_Atyy[idx_src]; + const double Atzz = a_Atzz[idx_src]; + const double Atxy = a_Atxy[idx_src]; + const double Atxz = a_Atxz[idx_src]; + const double Atyz = a_Atyz[idx_src]; + + const double At[3][3] = { + { Atxx, Atxy, Atxz }, + { Atxy, Atyy, Atyz }, + { Atxz, Atyz, Atzz }}; + + const double dx_At11 = diff4_dx(&a_Atxx[idx_src], 1, dx_inv); + const double dx_At22 = diff4_dx(&a_Atyy[idx_src], 1, dx_inv); + const double dx_At33 = diff4_dx(&a_Atzz[idx_src], 1, dx_inv); + const double dx_At13 = diff4_dx(&a_Atxz[idx_src], 1, dx_inv); + + const double dz_At11 = diff4_dz(&a_Atxx[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_At22 = diff4_dz(&a_Atyy[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_At33 = diff4_dz(&a_Atzz[idx_src], gh->cctk_lsh[0], dz_inv); + const double dz_At13 = diff4_dz(&a_Atxz[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dAt[3][3][3] = { + { + { dx_At11, 0.0, dx_At13 }, + { 0.0, dx_At22, 0.0 }, + { dx_At13, 0.0, dx_At33 }, + }, + { + { 0.0, on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 }, + { on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, on_axis ? dx_At13 : Atxz / x }, + { 0.0, on_axis ? dx_At13 : Atxz / x, 0.0 }, + }, + { + { dz_At11, 0.0, dz_At13 }, + { 0.0, dz_At22, 0.0 }, + { dz_At13, 0.0, dz_At33 }, + }, + }; + + const double phi = a_phi[idx_src]; + + const double dx_phi = diff4_dx(&a_phi[idx_src], 1, dx_inv); + const double dz_phi = diff4_dz(&a_phi[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dphi[3] = { dx_phi, 0.0, dz_phi }; + + const double dxx_phi = diff4_dxx(&a_phi[idx_src], gh->cctk_lsh[0], dx_inv); + const double dzz_phi = diff4_dzz(&a_phi[idx_src], gh->cctk_lsh[0], dz_inv); + const double dxz_phi = diff4_dxz(&a_phi[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + + const double d2phi[3][3] = { + { dxx_phi, 0.0, dxz_phi }, + { 0.0, on_axis ? dxx_phi : dx_phi / x, 0.0 }, + { dxz_phi, 0.0, dzz_phi }, + }; + + const double trK = a_trK[idx_src]; + + const double dx_trk = diff4_dx(&a_trK[idx_src], 1, dx_inv); + const double dz_trk = diff4_dz(&a_trK[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dtrK[3] = { dx_trk, 0.0, dz_trk }; + + const double kdot_xx = a_kdotxx[idx_src]; + const double kdot_yy = a_kdotyy[idx_src]; + const double kdot_zz = a_kdotzz[idx_src]; + const double kdot_xy = a_kdotxy[idx_src]; + const double kdot_xz = a_kdotxz[idx_src]; + const double kdot_yz = a_kdotyz[idx_src]; + + const double kdot[3][3] = {{ kdot_xx, kdot_xy, kdot_xz }, + { kdot_xy, kdot_yy, kdot_yz }, + { kdot_xz, kdot_yz, kdot_zz }}; + + const double Xtx = a_Xtx[idx_src]; + const double Xtz = a_Xtz[idx_src]; + + const double alpha = a_alpha[idx_src]; + const double dx_alpha = diff4_dx(&a_alpha[idx_src], 1, dx_inv); + const double dz_alpha = diff4_dz(&a_alpha[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dalpha[3] = { dx_alpha, 0.0, dz_alpha }; + + const double dxx_alpha = diff4_dxx(&a_alpha[idx_src], 1, dx_inv); + const double dzz_alpha = diff4_dzz(&a_alpha[idx_src], gh->cctk_lsh[0], dz_inv); + const double dxz_alpha = diff4_dxz(&a_alpha[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + + const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, + { 0, on_axis ? dxx_alpha : dx_alpha / x, 0 }, + { dxz_alpha, 0, dzz_alpha }}; + + const double betax = a_betax[idx_src]; + const double betaz = a_betaz[idx_src]; + + const double beta[3] = { betax, 0.0, betaz }; + + const double dx_betax = diff4_dx(&a_betax[idx_src], 1, dx_inv); + const double dz_betax = diff4_dz(&a_betax[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dx_betaz = diff4_dx(&a_betaz[idx_src], 1, dx_inv); + const double dz_betaz = diff4_dz(&a_betaz[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz }, + { 0.0, on_axis ? dx_betax : betax / x, 0.0 }, + { dz_betax, 0.0, dz_betaz }}; + + const double dxx_betax = diff4_dxx(&a_betax[idx_src], gh->cctk_lsh[0], dx_inv); + const double dxz_betax = diff4_dxz(&a_betax[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + const double dzz_betax = diff4_dzz(&a_betax[idx_src], gh->cctk_lsh[0], dz_inv); + + const double dxx_betaz = diff4_dxx(&a_betaz[idx_src], gh->cctk_lsh[0], dx_inv); + const double dxz_betaz = diff4_dxz(&a_betaz[idx_src], gh->cctk_lsh[0], dx_inv, dz_inv); + const double dzz_betaz = diff4_dzz(&a_betaz[idx_src], gh->cctk_lsh[0], dz_inv); + + const double d2beta[3][3][3] = { + { + { dxx_betax, 0.0, dxx_betaz }, + { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, + { dxz_betax, 0.0, dxz_betaz }, + }, + { + { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, + { on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, on_axis ? dxx_betaz : dx_betaz / x }, + { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, + }, + { + { dxz_betax, 0.0, dxz_betaz }, + { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, + { dzz_betax, 0.0, dzz_betaz }, + }, + }; + + const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); + + // \tilde{γ}^{ij} + gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; + gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; + gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; + gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; + gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; + gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; + gtu[1][0] = gtu[0][1]; + gtu[2][0] = gtu[0][2]; + gtu[2][1] = gtu[1][2]; + + // γ_{jk}/^{jk} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + gu[j][k] = SQR(phi) * gtu[j][k]; + g[j][k] = gt[j][k] / SQR(phi); + } + + // β_j + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + val += g[j][k] * beta[k]; + betal[j] = val; + } + + // ∂_j γ_{kl} + // ∂_j K_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + dg[j][k][l] = -2.0 * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi); + dK[j][k][l] = -2.0 * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) + + (1.0 / 3.0) * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); + } + + // ∂_{jk} g_{lm} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int m = 0; m < 3; m++) { + d2g[j][k][l][m] = 6.0 * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) - + 2.0 * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) - + 2.0 * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) - + 2.0 * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) + + d2gt[j][k][l][m] / SQR(phi); + } + + // ∂_j γ^{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + for (int n = 0; n < 3; n++) + val += -gu[k][m] * gu[l][n] * dg[j][m][n]; + dgu[j][k][l] = val; + } + + // Γ^j_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); + G[j][k][l] = val; + } + + // ∂_j Γ^k_{lm} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int m = 0; m < 3; m++) { + double val = 0.0; + for (int n = 0; n < 3; n++) { + val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + + gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); + } + dG[j][k][l][m] = 0.5 * val; + } + + // Γ^j = γ^{kl} Γ_{kl}^j + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + val += gu[k][l] * G[j][k][l]; + X[j] = val; + } + + // ∂_j β_k + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; + dbetal[j][k] = val; + } + + // ∂_{jk} β_l + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + + g[l][m] * d2beta[j][k][m]; + d2betal[j][k][l] = val; + } + + // K_{ij} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + K[j][k] = At[j][k] / SQR(phi) + (1.0 / 3.0) * g[j][k] * trK; + + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * K[l][k]; + Km[j][k] = val; + } + + // K^{ij} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * Km[k][l]; + Ku[j][k] = val; + } + + // ∂_j \dot{γ_kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + + d2betal[j][k][l] + d2betal[j][l][k]; + for (int m = 0; m < 3; m++) + dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); + } + + // \dot{γ^{jk}} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + gudot[j][k] = 2.0 * alpha * Ku[j][k]; + + for (int l = 0; l < 3; l++) { + gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; + + for (int m = 0; m < 3; m++) + gudot[j][k] += -gu[j][l] * G[k][l][m] * beta[m] - gu[k][l] * G[j][l][m] * beta[m]; + } + } + + + // \dot{Γ}^j_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + + gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); + + Gdot[j][k][l] = 0.5 * val; + } + + Gammadot_term = 0.0; + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + val += gu[k][l] * Gdot[j][k][l]; + } + + Gammadot_term += val * dalpha[j]; + } + + Kij_Dij_alpha = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += G[l][j][k] * dalpha[l]; + + Kij_Dij_alpha += Ku[j][k] * (d2alpha[j][k] - val); + } + + k_kdot = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + k_kdot += kdot[j][k] * Ku[j][k]; + + k3 = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += Km[k][l] * Km[l][j]; + k3 += val * Km[j][k]; + } + + // K_{ij} K^{ij} + k2 = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + k2 += Km[j][k] * Km[k][j]; + + // Ric_{jk} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += dG[m][m][j][k] - dG[k][m][j][m]; + for (int m = 0; m < 3; m++) + for (int l = 0; l < 3; l++) + val += G[l][l][m] * G[m][j][k] - G[l][k][m] * G[m][j][l]; + Ric[j][k] = val; + } + + // Ric^j_k + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * Ric[l][k]; + Ricm[j][k] = val; + } + + beta_term = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double D2beta = 0.0; + + D2beta += d2beta[j][k][l]; + + for (int m = 0; m < 3; m++) { + D2beta += dG[j][l][k][m] * beta[m] + G[l][k][m] * dbeta[j][m] + G[l][j][m] * dbeta[k][m] - G[m][j][k] * dbeta[m][l]; + + for (int n = 0; n < 3; n++) + D2beta += G[l][j][m] * G[m][k][n] * beta[n] - G[m][j][k] * G[l][m][n] * beta[n]; + } + + beta_term += -gu[j][k] * D2beta * dalpha[l]; + } + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; + + solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = gu[0][0] + (on_axis ? gu[1][1] : 0.0); + solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = gu[2][2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = 2.0 * gu[0][2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -X[0] + (on_axis ? 0.0 : gu[1][1] / x); + solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -X[2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2; + + solver->rhs[idx_rhs] = -2.0 * alpha * Kij_Dij_alpha + Gammadot_term + + 2.0 * alpha * (k_kdot + 2.0 * alpha * k3) + beta_term; + + if (on_axis) { + solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; + solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; + } + } +} + +static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) +{ +#pragma omp parallel for + for (int j = 0; j < solver->local_size[1]; j++) + for (int i = 0; i < solver->local_size[0]; i++) { + const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); + const int idx_src = j * solver->u_stride + i; + dst[idx_dst] = solver->u[idx_src]; + } + + /* fill in the axial symmetry ghostpoints by mirroring */ + if (!cp->bnd_intercomp[0][0]) { +#pragma omp parallel for + for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) + for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + const ptrdiff_t idx_src = CPINDEX(cp, 2 * cp->offset_left[0] - idx_x, cp->y_idx, idx_z); + dst[idx_dst] = dst[idx_src]; + } + } + if (!cp->bnd_intercomp[1][0]) { +#pragma omp parallel for + for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++) + for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) { + const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); + const ptrdiff_t idx_src = CPINDEX(cp, idx_x, cp->y_idx, 2 * cp->offset_left[1] - idx_z); + dst[idx_dst] = dst[idx_src]; + } + } +} + +void qms_mg_solve(CCTK_ARGUMENTS) +{ + QMSMGContext *ms = qms_context; + CoordPatch *cp; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + const int reflevel = ctz(cctkGH->cctk_levfac[0]); + const double time = ms->gh->cctk_time; + + int64_t grid_size; + int ret; + + + if (cctkGH->cctk_time >= switchoff_time + 1.0) + return; + + if (reflevel < ms->solve_level) + return; + if (reflevel > ms->solve_level && + fabs(time - W_val1_time[ms->solve_level]) > 1e-13) + return; + + cp = get_coord_patch(ms, reflevel); + + grid_size = cctkGH->cctk_lsh[0] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[2]; + + fill_eq_coeffs(ms, cp, cp->solver); + + if (reflevel > ms->solve_level) { + /* fill the solver boundary conditions */ + if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { +#pragma omp parallel for + for (int j = 0; j < cp->solver->fd_stencil; j++) { + MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, ABS(i) + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); + dst[i] = W_val1[idx]; + } + } + } + if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { +#pragma omp parallel for + for (int j = 0; j < cp->solver->fd_stencil; j++) { + MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; + double *dst = bnd->val + j * bnd->val_stride; + for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { + const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[1] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + ABS(i)); + dst[i] = W_val1[idx]; + } + } + } + } + + fprintf(stderr, "%d qms solve: time %g\n", reflevel, time); + ret = mg2d_solve(cp->solver); + if (ret < 0) + CCTK_WARN(0, "Error solving the quasi-maximal slicing equation"); + + /* add the solution to the list of past solutions */ + { + memcpy(W_val0, W_val1, sizeof(*W_val0) * grid_size); + W_val0_time[reflevel] = W_val1_time[reflevel]; + + solution_to_grid(cp, cp->solver, W_val1); + W_val1_time[reflevel] = ms->gh->cctk_time; + } + + /* linearly extrapolate the past two solution to predict the next one */ + { + double *u0 = W_val0; + double *u1 = W_val1; + double time0 = W_val0_time[reflevel]; + double time1 = W_val1_time[reflevel]; + double time = time1 + ms->gh->cctk_delta_time / (1.0 * (1 << ms->solve_level)); + + double fact0, fact1; + + if (time0 == DBL_MAX && time1 == DBL_MAX) { + fact0 = 0.0; + fact1 = 0.0; + } else if (time0 == DBL_MAX) { + fact0 = 0.0; + fact1 = 0.0; + } else { + fact0 = (time - time1) / (time0 - time1); + fact1 = (time - time0) / (time1 - time0); + } + + memcpy(W_pred0, W_pred1, sizeof(*W_pred0) * grid_size); + W_pred0_time[reflevel] = W_pred1_time[reflevel]; + + for (int i = 0; i < grid_size; i++) + W_pred1[i] = (u1[i] * fact1 + u0[i] * fact0); + + W_pred1_time[reflevel] = time; + } +} + +void qms_mg_sync(CCTK_ARGUMENTS) +{ +} + +void qms_mg_eval(CCTK_ARGUMENTS) +{ + QMSMGContext *ms; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + const int reflevel = ctz(cctkGH->cctk_levfac[0]); + const double time = cctkGH->cctk_time; + const int64_t grid_size = cctkGH->cctk_lsh[0] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[2]; + + int i, ret; + + ms = qms_context; + + if (reflevel < ms->solve_level || time >= switchoff_time + 1.0) + return; + + { + double *u0 = W_pred0; + double *u1 = W_pred1; + double time0 = W_pred0_time[reflevel]; + double time1 = W_pred1_time[reflevel]; + + double fact, fact0, fact1; + + if (time0 == DBL_MAX && time1 == DBL_MAX) { + fact0 = 0.0; + fact1 = 0.0; + } else if (time0 == DBL_MAX) { + fact0 = 0.0; + fact1 = 1.0; + } else { + fact0 = (time - time1) / (time0 - time1); + fact1 = (time - time0) / (time1 - time0); + } + + if (time < switchoff_time) + fact = 1.0; + else + fact = exp(-36.0 * pow((time - switchoff_time), 4.0)); + +#pragma omp parallel for + for (int i = 0; i < grid_size; i++) + W[i] = (u1[i] * fact1 + u0[i] * fact0) * fact; + + } +} + +static int context_init(cGH *gh, int solve_level, int fd_stencil, int maxiter, int exact_size, int nb_cycles, + int nb_relax_pre, int nb_relax_post, double tol_residual_base, + double cfl_factor, const char *loglevel_str, + QMSMGContext **ctx) +{ + QMSMGContext *qms; + int loglevel = -1; + int ret; + + qms = calloc(1, sizeof(*qms)); + if (!qms) + return -ENOMEM; + + qms->gh = gh; + + qms->solve_level = solve_level; + qms->fd_stencil = fd_stencil; + qms->maxiter = maxiter; + qms->max_exact_size = exact_size; + qms->nb_cycles = nb_cycles; + qms->nb_relax_pre = nb_relax_pre; + qms->nb_relax_post = nb_relax_post; + qms->tol_residual_base = tol_residual_base; + qms->cfl_factor = cfl_factor; + + for (int i = 0; i < ARRAY_ELEMS(log_levels); i++) { + if (!strcmp(loglevel_str, log_levels[i].str)) { + loglevel = log_levels[i].level; + break; + } + } + if (loglevel < 0) { + fprintf(stderr, "Invalid loglevel: %s\n", loglevel_str); + return -EINVAL; + } + qms->log_level = loglevel; + + *ctx = qms; + + return 0; +} + +void qms_mg_init(CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + int use_tapered_grids = *(int*)CCTK_ParameterGet("use_tapered_grids", "Carpet", &use_tapered_grids); + int ret; + + fprintf(stderr, "%d init\n", ctz(cctk_levfac[0])); + + if (!use_tapered_grids) + CCTK_WARN(0, "MaximalSlicingAxiMG only works with use_tapered_grids=1"); + + if (!qms_context) { + ret = context_init(cctkGH, solve_level, fd_stencil, maxiter, exact_size, nb_cycles, + nb_relax_pre, nb_relax_post, tol_residual_base, + cfl_factor, loglevel, &qms_context); + if (ret < 0) + CCTK_WARN(0, "Error initializing the solver context"); + } +} + +void qms_mg_inithist(CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + size_t grid_size = cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2]; + + fprintf(stderr, "%d inithist\n", ctz(cctk_levfac[0])); + + for (int i = 0; i < 32; i++) { + W_pred0_time[i] = DBL_MAX; + W_pred1_time[i] = DBL_MAX; + W_val0_time[i] = DBL_MAX; + W_val1_time[i] = DBL_MAX; + } + + memset(Kdot11, 0, sizeof(double) * grid_size); + memset(Kdot22, 0, sizeof(double) * grid_size); + memset(Kdot33, 0, sizeof(double) * grid_size); + memset(Kdot12, 0, sizeof(double) * grid_size); + memset(Kdot13, 0, sizeof(double) * grid_size); + memset(Kdot23, 0, sizeof(double) * grid_size); + + memset(Xtdot1, 0, sizeof(double) * grid_size); + memset(Xtdot2, 0, sizeof(double) * grid_size); + memset(Xtdot3, 0, sizeof(double) * grid_size); + + memset(W_pred0, 0, sizeof(double) * grid_size); + memset(W_pred1, 0, sizeof(double) * grid_size); + memset(W_val0, 0, sizeof(double) * grid_size); + memset(W_val1, 0, sizeof(double) * grid_size); +} -- cgit v1.2.3