#include "common.h" #include #include #include #include #include #include #include #include #include #include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "cctk_Timers.h" #include "util_Table.h" #include "qms.h" #include "qms_solve.h" /* precomputed values for a given refined grid */ typedef struct CoordPatch { CCTK_REAL origin[3]; CCTK_INT delta[3]; CCTK_INT size[3]; // basis values on the grid double *basis_val_r; double *basis_val_z; double *transform_z; double *transform_matrix; double *transform_matrix1; double *transform_tmp; int y_idx; } CoordPatch; struct QMSContext { QMSSolver *solver; cGH *gh; struct { double time; double *coeffs; } solutions[2], pred[2]; double *coeffs_eval; double max_radius; uint64_t grid_expand_count; uint64_t grid_expand_time; CoordPatch *patches; int nb_patches; }; double scale_factor; static int ctz(int a) { int ret = 0; if (!a) return INT_MAX; while (!(a & 1)) { a >>= 1; ret++; } return ret; } /* get an approximate "main" frequency component in a basis function */ static double calc_basis_freq(const BasisSet *b, int order) { return b->colloc_point(order, 1); } static CoordPatch *get_coord_patch(QMSContext *ms, CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z, double scale_factor, double scale_power) { cGH *cctkGH = ms->gh; CoordPatch *cp; int64_t grid_size; int i; for (int i = 0; i < ms->nb_patches; i++) { cp = &ms->patches[i]; if (cp->origin[0] == ms->gh->cctk_origin_space[0] && cp->origin[1] == ms->gh->cctk_origin_space[1] && cp->origin[2] == ms->gh->cctk_origin_space[2] && cp->size[0] == ms->gh->cctk_lsh[0] && cp->size[1] == ms->gh->cctk_lsh[1] && cp->size[2] == ms->gh->cctk_lsh[2] && cp->delta[0] == ms->gh->cctk_levfac[0] && cp->delta[1] == ms->gh->cctk_levfac[1] && cp->delta[2] == ms->gh->cctk_levfac[2]) return cp; } grid_size = cctkGH->cctk_lsh[0] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[2]; /* create a new patch */ ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); cp = &ms->patches[ms->nb_patches]; memset(cp, 0, sizeof(*cp)); memcpy(cp->origin, ms->gh->cctk_origin_space, sizeof(cp->origin)); memcpy(cp->size, ms->gh->cctk_lsh, sizeof(cp->size)); memcpy(cp->delta, ms->gh->cctk_levfac, sizeof(cp->delta)); for (i = 0; i < cp->size[1]; i++) if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) { cp->y_idx = i; break; } if (i == cp->size[1]) CCTK_WARN(0, "The grid does not include y==0"); #if QMS_POLAR || 1 posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * ms->solver->nb_coeffs[0] * cp->size[0] * cp->size[2]); posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * ms->solver->nb_coeffs[1] * cp->size[0] * cp->size[2]); #pragma omp parallel for for (int j = 0; j < cp->size[2]; j++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; for (int i = 0; i < cp->size[0]; i++) { const int idx_grid = j * cp->size[0] + i; double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; double rr = sqrt(SQR(xx) + SQR(zz)); #if QMS_POLAR double coord0 = rr; double coord1 = atan2(zz, xx); #else double coord0 = xx; double coord1 = zz; #endif //for (int k = 0; k < ms->nb_coeffs_z; k++) // for (int l = 0; l < ms->nb_coeffs_x; l++) { // const int idx_coeff = k * ms->nb_coeffs_x + l; // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k); // } for (int k = 0; k < ms->solver->nb_coeffs[0]; k++) { double dx = calc_basis_freq(ms->solver->basis[0], k); double r0 = MIN(ms->max_radius, dx * scale_factor); double fact = exp(-36.0 * pow(rr / r0, scale_power)); cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact; } for (int k = 0; k < ms->solver->nb_coeffs[1]; k++) { double dx = calc_basis_freq(ms->solver->basis[1], k); double r0 = MIN(ms->max_radius, dx * scale_factor); double fact = exp(-36.0 * pow(rr / r0, scale_power)); cp->transform_matrix1[idx_grid * ms->solver->nb_coeffs[1] + k] = ms->solver->basis[1]->eval(coord1, k) * fact; } } } posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * ms->solver->nb_coeffs[1]); #else posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->solver->nb_coeffs[0] * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); for (int k = 0; k < ms->solver->nb_coeffs[0]; k++) //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k); cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->solver->basis[0]->eval(r, k); } posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->solver->nb_coeffs[1] * ms->gh->cctk_lsh[2]); for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; for (int j = 0; j < ms->solver->nb_coeffs[1]; j++) cp->basis_val_z [i * ms->solver->nb_coeffs[1] + j] = ms->solver->basis[0]->eval(fabs(zz), j); //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); } posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->solver->nb_coeffs[0]); #endif #if 0 nb_threads = getenv("OMP_NUM_THREADS"); if (nb_threads) cp->nb_threads = atoi(nb_threads); if (cp->nb_threads <= 0) cp->nb_threads = 1; qms_threadpool_init(&cp->tp, cp->nb_threads); cp->ec = calloc(cp->nb_threads, sizeof(*cp->ec)); block_size = (ms->gh->cctk_lsh[2] + cp->nb_threads - 1) / cp->nb_threads; for (int i = 0; i < cp->nb_threads; i++) { EvalContext *ec = &cp->ec[i]; ec->qms = ms; ec->nb_coeffs[0] = ms->solver->nb_coeffs[0]; ec->nb_coeffs[1] = ms->solver->nb_coeffs[1]; posix_memalign((void**)&ec->eval_tmp[0], 32, sizeof(*ec->eval_tmp[0]) * ec->nb_coeffs[0]); posix_memalign((void**)&ec->eval_tmp[1], 32, sizeof(*ec->eval_tmp[1]) * ec->nb_coeffs[1]); ec->x_idx_start = 0; ec->x_idx_end = ms->gh->cctk_lsh[0]; ec->z_idx_start = block_size * i; ec->z_idx_end = MIN(block_size * (i + 1), ms->gh->cctk_lsh[2]); } #endif ms->nb_patches++; return cp; } static QMSContext *qms_context; static int context_init(cGH *cctkGH) { QMSContext *qms; int ret; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; qms = calloc(1, sizeof(*qms)); if (!qms) return -ENOMEM; qms->gh = cctkGH; //qms->max_radius = 60.0; qms->max_radius = outer_bound; ret = qms_solver_init(&qms->solver, cctkGH, basis_order_r, basis_order_z, outer_bound, filter_power, 0.0, ccz4); if (ret < 0) return ret; ret = posix_memalign((void**)&qms->coeffs_eval, 32, basis_order_r * basis_order_z * sizeof(*qms->coeffs_eval)); if (ret) return -ENOMEM; for (int i = 0; i < ARRAY_ELEMS(qms->solutions); i++) { qms->solutions[i].coeffs = W_coeffs + i * basis_order_r * basis_order_z; //memset(qms->solutions[i].coeffs, 0, basis_order_r * basis_order_z * sizeof(*qms->solutions[i].coeffs)); qms->solutions[i].time = cctkGH->cctk_time - (2 - i) * cctkGH->cctk_delta_time / (1.0 * (1 << solve_level)); fprintf(stderr, "init time %d %f\n", i, qms->solutions[i].time); } for (int i = 0; i < ARRAY_ELEMS(qms->pred); i++) { qms->pred[i].coeffs = W_pred + i * basis_order_r * basis_order_z; //memset(qms->pred[i].coeffs, 0, basis_order_r * basis_order_z * sizeof(*qms->pred[i].coeffs)); qms->pred[i].time = cctkGH->cctk_time - (1 - i) * cctkGH->cctk_delta_time / (1.0 * (1 << solve_level)); fprintf(stderr, "init time %d %f\n", i, qms->pred[i].time); } qms_context = qms; return 0; } void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS) { QMSContext *ms; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; double time; if (!qms_context) context_init(cctkGH); ms = qms_context; if (cctkGH->cctk_time >= switchoff_time + 1.0) return; if (ctz(ms->gh->cctk_levfac[0]) != solve_level) return; time = cctkGH->cctk_time * ms->gh->cctk_levfac[0] / ms->gh->cctk_delta_time; if (fabs(time - ceilf(time)) > 1e-8 || (ms->solutions[ARRAY_ELEMS(ms->solutions) - 1].time >= cctkGH->cctk_time)) return; CCTK_TimerStart("QuasiMaximalSlicing_Solve"); qms_solver_solve(ms->solver); CCTK_TimerStop("QuasiMaximalSlicing_Solve"); fprintf(stderr, "%d qms solve: time %g %g %g\n", ms->gh->cctk_levfac[0], ms->gh->cctk_time, time, ms->solver->coeffs[0]); /* add the solution to the list of past solutions */ { int N = ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]; memcpy(ms->solutions[0].coeffs, ms->solutions[1].coeffs, sizeof(*ms->solutions[0].coeffs) * N); ms->solutions[0].time = ms->solutions[1].time; memcpy(ms->solutions[1].coeffs, ms->solver->coeffs, sizeof(*ms->solutions[1].coeffs) * N); ms->solutions[1].time = ms->gh->cctk_time; } /* linearly extrapolate the past two solution to predict the next one */ { int N = ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]; double time0 = ms->solutions[0].time; double time1 = ms->solutions[1].time; double time = time1 + ms->gh->cctk_delta_time / (1.0 * (1 << solve_level)); double *coeffs0 = ms->solutions[0].coeffs; double *coeffs1 = ms->solutions[1].coeffs; double *pred = ms->pred[1].coeffs; memcpy(ms->pred[0].coeffs, ms->pred[1].coeffs, sizeof(*ms->pred[0].coeffs) * N); ms->pred[0].time = ms->pred[1].time; for (int i = 0; i < N; i++) ms->pred[1].coeffs[i] = (coeffs1[i] * (time - time0) / (time1 - time0) + coeffs0[i] * (time - time1) / (time0 - time1)); ms->pred[1].time = time; } } void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) { QMSContext *ms; CoordPatch *cp; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; double time; int64_t expand_start; double *coeffs = NULL, *W; int i, ret; if (!qms_context) context_init(cctkGH); time = cctkGH->cctk_time; if (time >= switchoff_time + 1.0) return; ms = qms_context; cp = get_coord_patch(ms, x, y, z, scale_factor, scale_power); #if 0 //coeffs = ms->coeffs; coeffs = ms->solutions[ARRAY_ELEMS(ms->solutions) - 1].coeffs; #else coeffs = ms->coeffs_eval; { double *coeffs0 = ms->pred[0].coeffs; double *coeffs1 = ms->pred[1].coeffs; double time0 = ms->pred[0].time; double time1 = ms->pred[1].time; double fact; if (time < switchoff_time) fact = 1.0; else fact = exp(-36.0 * pow((time - switchoff_time), 4.0)); for (int i = 0; i < ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]; i++) coeffs[i] = (coeffs1[i] * (time - time0) / (time1 - time0) + coeffs0[i] * (time - time1) / (time0 - time1)) * fact; } #endif if (ccz4) W = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::W"); else W = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::W"); CCTK_TimerStart("QuasiMaximalSlicing_Expand"); expand_start = gettime(); #if 0 #pragma omp parallel for for (int k = 0; k < cctk_lsh[2]; k++) { for (int i = 0; i < cctk_lsh[0]; i++) { int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k); double xx = x[idx]; double zz = z[idx]; double r = sqrt(SQR(xx) + SQR(zz)); double phi = atan2(zz, xx); double val = 0.0; for (int l = 0; l < ms->nb_coeffs_z; l++) { double tmp = 0.0; for (int m = 0; m < ms->nb_coeffs_x; m++) { const int idx_coeff = l * ms->nb_coeffs_x + m; tmp += coeffs[idx_coeff] * ms->basis->eval(r, m); } val += tmp * ms->basis1->eval(phi, l); } W[idx] = val; } } #elif QMS_POLAR || 1 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < cctk_lsh[2]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { const int idx_grid = j * cctk_lsh[0] + i; const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); W[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val; } #else memset(W, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*W)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, ms->solver->nb_coeffs[0], cctk_lsh[2], ms->solver->nb_coeffs[1], 1.0, coeffs, ms->solver->nb_coeffs[0], cp->basis_val_z, ms->solver->nb_coeffs[1], 0.0, cp->transform_z, ms->solver->nb_coeffs[0]); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->solver->nb_coeffs[0], 1.0, cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->solver->nb_coeffs[0], 1.0, W, cctk_lsh[0] * cctk_lsh[1]); #endif ms->grid_expand_time += gettime() - expand_start; ms->grid_expand_count++; CCTK_TimerStop("QuasiMaximalSlicing_Expand"); /* print stats */ if (!(ms->grid_expand_count & 255)) { fprintf(stderr, "Quasi-maximal slicing stats:\n"); qms_solver_print_stats(ms->solver); fprintf(stderr, "%lu evals: total time %g s, avg time per call %g ms\n", ms->grid_expand_count, (double)ms->grid_expand_time / 1e6, (double)ms->grid_expand_time / ms->grid_expand_count / 1e3); } } void qms_init(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; if (!qms_context) context_init(cctkGH); double *Kdot11, *Kdot22, *Kdot33, *Kdot12, *Kdot13, *Kdot23, *Xtdot1, *Xtdot2, *Xtdot3; if (ccz4) { Kdot11 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot11"); Kdot22 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot22"); Kdot33 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot33"); Kdot12 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot12"); Kdot13 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot13"); Kdot23 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Kdot23"); Xtdot1 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Xtdot1"); Xtdot2 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Xtdot2"); Xtdot3 = CCTK_VarDataPtr(cctkGH, 0, "ML_CCZ4::Xtdot3"); } else { Kdot11 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot11"); Kdot22 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot22"); Kdot33 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot33"); Kdot12 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot12"); Kdot13 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot13"); Kdot23 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot23"); Xtdot1 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Xtdot1"); Xtdot2 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Xtdot2"); Xtdot3 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Xtdot3"); } for (int k = 0; k < cctk_lsh[2]; k++) for (int j = 0; j < cctk_lsh[1]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { int idx = CCTK_GFINDEX3D(cctkGH, i, j, k); Kdot11[idx] = 0.0; Kdot22[idx] = 0.0; Kdot33[idx] = 0.0; Kdot12[idx] = 0.0; Kdot13[idx] = 0.0; Kdot23[idx] = 0.0; Xtdot1[idx] = 0.0; Xtdot2[idx] = 0.0; Xtdot3[idx] = 0.0; } }