#include #include #include #include #include #include #include #include #include #include #include //#include //#include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "cctk_Timers.h" #include "util_Table.h" #include "ms_solve.h" /* precomputed values for a given refined grid */ typedef struct CoordPatch { int level; // basis values on the grid double *basis_val_r; double *basis_val_z; double *transform_z; double *transform_matrix; double *transform_matrix1; double *transform_tmp; double *one; int y_idx; } CoordPatch; typedef struct MaximalSlicingContext { MSSolver *solver; cGH *gh; uint64_t grid_expand_count; uint64_t grid_expand_time; CoordPatch *patches; int nb_patches; } MaximalSlicingContext; double scale_factor; static int ctz(int a) { int ret = 0; if (!a) return INT_MAX; while (!(a & 1)) { a >>= 1; ret++; } return ret; } /* get an approximate "main" frequency component in a basis function */ static double calc_basis_freq(const BasisSet *b, int order) { return b->colloc_point(order, 1); } static void coord_patch_free(CoordPatch *cp) { free(cp->basis_val_r); free(cp->basis_val_z); free(cp->transform_z); free(cp->transform_matrix); free(cp->transform_matrix1); free(cp->transform_tmp); free(cp->one); } static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, int level) { cGH *gh = ms->gh; int nb_coeffs_x = ms->solver->nb_coeffs[0]; int nb_coeffs_z = ms->solver->nb_coeffs[1]; const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); CoordPatch *cp; int64_t grid_size; int i; for (int i = 0; i < ms->nb_patches; i++) { cp = &ms->patches[i]; if (cp->level == level) return cp; } grid_size = gh->cctk_lsh[0] * gh->cctk_lsh[1] * gh->cctk_lsh[2]; /* create a new patch */ ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); cp = &ms->patches[ms->nb_patches]; memset(cp, 0, sizeof(*cp)); cp->level = level; for (i = 0; i < gh->cctk_lsh[1]; i++) if (fabs(a_y[CCTK_GFINDEX3D(gh, 0, i, 0)]) < 1e-8) { cp->y_idx = i; break; } if (i == gh->cctk_lsh[1]) CCTK_WARN(0, "The grid does not include y==0"); #if 0 posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); for (int k = 0; k < ms->nb_coeffs_x; k++) //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k); cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->basis->eval(r, k); } posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]); for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; for (int j = 0; j < ms->nb_coeffs_z; j++) cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j); //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); } posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x); posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size); for (int i = 0; i < grid_size; i++) cp->one[i] = 1.0; #else posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * nb_coeffs_x * gh->cctk_lsh[0] * gh->cctk_lsh[2]); posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * nb_coeffs_z * gh->cctk_lsh[0] * gh->cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < gh->cctk_lsh[2]; j++) { CCTK_REAL zz = a_z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; for (int i = 0; i < gh->cctk_lsh[0]; i++) { const int idx_grid = j * gh->cctk_lsh[0] + i; double xx = a_x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; double rr = sqrt(SQR(xx) + SQR(zz)); #if MS_POLAR double coord0 = rr; double coord1 = atan2(zz, xx); #else double coord0 = xx; double coord1 = zz; #endif //for (int k = 0; k < ms->nb_coeffs_z; k++) // for (int l = 0; l < ms->nb_coeffs_x; l++) { // const int idx_coeff = k * ms->nb_coeffs_x + l; // cp->transform_matrix[idx_grid + gh->cctk_lsh[0] * gh->cctk_lsh[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k); // } for (int k = 0; k < nb_coeffs_x; k++) { double dx = calc_basis_freq(ms->solver->basis[0], k); double r0 = dx * 64; double fact = exp(-36.0 * pow(rr / r0, 10)); fact = 1.0; cp->transform_matrix[idx_grid + gh->cctk_lsh[0] * gh->cctk_lsh[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact; } for (int k = 0; k < nb_coeffs_z; k++) { double dx = calc_basis_freq(ms->solver->basis[1], k); double r0 = dx * 64; double fact = exp(-36.0 * pow(rr / r0, 10)); fact = 1.0; cp->transform_matrix1[idx_grid * nb_coeffs_z + k] = ms->solver->basis[1]->eval(coord1, k) * fact; } } } posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * gh->cctk_lsh[0] * gh->cctk_lsh[2] * nb_coeffs_z); #endif ms->nb_patches++; return cp; } int msa_context_init(cGH *gh, MaximalSlicingContext **ctx, int basis_order0, int basis_order1, double outer_bound, double filter_power) { MaximalSlicingContext *ms; int ret; ms = calloc(1, sizeof(*ms)); if (!ms) return -ENOMEM; ms->gh = gh; ret = ms_solver_init(&ms->solver, gh, basis_order0, basis_order1, outer_bound, filter_power, 0.0); if (ret < 0) return ret; *ctx = ms; return 0; } void msa_context_free(MaximalSlicingContext **pms) { MaximalSlicingContext *ms = *pms; if (!ms) return; for (int i = 0; i < ms->nb_patches; i++) coord_patch_free(&ms->patches[i]); free(ms->patches); free(ms); *pms = NULL; } void msa_lapse_eval(MaximalSlicingContext *ms, double *dst) { const int reflevel = ctz(ms->gh->cctk_levfac[0]); CoordPatch *cp = get_coord_patch(ms, reflevel); double *coeffs = ms->solver->coeffs; #if 0 #pragma omp parallel for for (int k = 0; k < cctk_lsh[2]; k++) { for (int i = 0; i < cctk_lsh[0]; i++) { int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k); double xx = x[idx]; double zz = z[idx]; double r = sqrt(SQR(xx) + SQR(zz)); double phi = atan2(zz, xx); double val = 1.0; for (int l = 0; l < ms->nb_coeffs_z; l++) { double tmp = 0.0; for (int m = 0; m < ms->nb_coeffs_x; m++) { const int idx_coeff = l * ms->nb_coeffs_x + m; tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m); } val += tmp * ms->basis1->eval(phi, l); } alp[idx] = val; } } #else cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], 1.0, cp->transform_matrix, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2], coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < ms->gh->cctk_lsh[2]; j++) for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { const int idx_grid = j * ms->gh->cctk_lsh[0] + i; const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, cp->transform_tmp + idx_grid, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2]); dst[CCTK_GFINDEX3D(ms->gh, i, cp->y_idx, j)] = 1.0 + val; } #endif //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, // 0.0, cp->transform_z, ms->nb_coeffs_x); //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); } void msa_lapse_solve(MaximalSlicingContext *ms) { ms_solver_solve(ms->solver); } void maximal_slicing_axi_initlapse(CCTK_ARGUMENTS) { MaximalSlicingContext *ms; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int i, ret; msa_context_init(cctkGH, &ms, basis_order_r, basis_order_z, 64.0, filter_power); ms_solver_solve(ms->solver); msa_lapse_eval(ms, alpha); memcpy(alp, alpha, sizeof(*alp) * cctk_lsh[0] * cctk_lsh[2]); msa_context_free(&ms); } void maximal_slicing_axi(CCTK_ARGUMENTS) { static MaximalSlicingContext *ms; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int64_t expand_start, totaltime_start; int i, ret; totaltime_start = gettime(); /* on the first run, init the solver */ if (!ms) msa_context_init(cctkGH, &ms, basis_order_r, basis_order_z, 64.0, filter_power); CCTK_TimerStart("MaximalSlicingAxi_Solve"); msa_lapse_solve(ms); CCTK_TimerStop("MaximalSlicingAxi_Solve"); if (export_coeffs) memcpy(alpha_coeffs, ms->solver->coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); CCTK_TimerStart("MaximalSlicingAxi_Expand"); expand_start = gettime(); msa_lapse_eval(ms, alpha); ms->grid_expand_time += gettime() - expand_start; ms->grid_expand_count++; CCTK_TimerStop("MaximalSlicingAxi_Expand"); /* print stats */ if (!(ms->grid_expand_count & 255)) { fprintf(stderr, "Maximal slicing stats:\n"); ms_solver_print_stats(ms->solver); fprintf(stderr, "%lu evals: total time %g s, avg time per call %g ms\n", ms->grid_expand_count, (double)ms->grid_expand_time / 1e6, (double)ms->grid_expand_time / ms->grid_expand_count / 1e3); } }