#include #include #include #include #include #include #include #include #include #include #include //#include //#include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "cctk_Timers.h" #include "util_Table.h" #include "ms_solve.h" /* precomputed values for a given refined grid */ typedef struct CoordPatch { CCTK_REAL origin[3]; CCTK_INT delta[3]; CCTK_INT size[3]; // basis values on the grid double *basis_val_r; double *basis_val_z; double *transform_z; double *transform_matrix; double *transform_matrix1; double *transform_tmp; double *one; int y_idx; } CoordPatch; typedef struct MaximalSlicingContext { MSSolver *solver; cGH *gh; uint64_t grid_expand_count; uint64_t grid_expand_time; CoordPatch *patches; int nb_patches; } MaximalSlicingContext; double scale_factor; /* get an approximate "main" frequency component in a basis function */ static double calc_basis_freq(const BasisSet *b, int order) { return b->colloc_point(order, 1); } static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z) { cGH *cctkGH = ms->gh; int nb_coeffs_x = ms->solver->nb_coeffs[0]; int nb_coeffs_z = ms->solver->nb_coeffs[1]; CoordPatch *cp; int64_t grid_size; int i; for (int i = 0; i < ms->nb_patches; i++) { cp = &ms->patches[i]; if (cp->origin[0] == ms->gh->cctk_origin_space[0] && cp->origin[1] == ms->gh->cctk_origin_space[1] && cp->origin[2] == ms->gh->cctk_origin_space[2] && cp->size[0] == ms->gh->cctk_lsh[0] && cp->size[1] == ms->gh->cctk_lsh[1] && cp->size[2] == ms->gh->cctk_lsh[2] && cp->delta[0] == ms->gh->cctk_levfac[0] && cp->delta[1] == ms->gh->cctk_levfac[1] && cp->delta[2] == ms->gh->cctk_levfac[2]) return cp; } grid_size = cctkGH->cctk_lsh[0] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[2]; /* create a new patch */ ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); cp = &ms->patches[ms->nb_patches]; memset(cp, 0, sizeof(*cp)); memcpy(cp->origin, ms->gh->cctk_origin_space, sizeof(cp->origin)); memcpy(cp->size, ms->gh->cctk_lsh, sizeof(cp->size)); memcpy(cp->delta, ms->gh->cctk_levfac, sizeof(cp->delta)); for (i = 0; i < cp->size[1]; i++) if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) { cp->y_idx = i; break; } if (i == cp->size[1]) CCTK_WARN(0, "The grid does not include y==0"); #if 0 posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); for (int k = 0; k < ms->nb_coeffs_x; k++) //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k); cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->basis->eval(r, k); } posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]); for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; for (int j = 0; j < ms->nb_coeffs_z; j++) cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j); //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); } posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x); posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size); for (int i = 0; i < grid_size; i++) cp->one[i] = 1.0; #else posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * nb_coeffs_x * cp->size[0] * cp->size[2]); posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * nb_coeffs_z * cp->size[0] * cp->size[2]); #pragma omp parallel for for (int j = 0; j < cp->size[2]; j++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; for (int i = 0; i < cp->size[0]; i++) { const int idx_grid = j * cp->size[0] + i; double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; double rr = sqrt(SQR(xx) + SQR(zz)); #if MS_POLAR double coord0 = rr; double coord1 = atan2(zz, xx); #else double coord0 = xx; double coord1 = zz; #endif //for (int k = 0; k < ms->nb_coeffs_z; k++) // for (int l = 0; l < ms->nb_coeffs_x; l++) { // const int idx_coeff = k * ms->nb_coeffs_x + l; // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k); // } for (int k = 0; k < nb_coeffs_x; k++) { double dx = calc_basis_freq(ms->solver->basis[0], k); double r0 = dx * 64; double fact = exp(-36.0 * pow(rr / r0, 10)); fact = 1.0; cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact; } for (int k = 0; k < nb_coeffs_z; k++) { double dx = calc_basis_freq(ms->solver->basis[1], k); double r0 = dx * 64; double fact = exp(-36.0 * pow(rr / r0, 10)); fact = 1.0; cp->transform_matrix1[idx_grid * nb_coeffs_z + k] = ms->solver->basis[1]->eval(coord1, k) * fact; } } } posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * nb_coeffs_z); #endif ms->nb_patches++; return cp; } static int context_init(cGH *cctkGH, MaximalSlicingContext **ctx) { MaximalSlicingContext *ms; int ret; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; ms = calloc(1, sizeof(*ms)); if (!ms) return -ENOMEM; ms->gh = cctkGH; ret = ms_solver_init(&ms->solver, cctkGH, basis_order_r, basis_order_z, scale_factor, filter_power, 0.0); if (ret < 0) return ret; *ctx = ms; return 0; } void maximal_slicing_axi(CCTK_ARGUMENTS) { static MaximalSlicingContext *ms; CoordPatch *cp; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; int64_t expand_start, totaltime_start; double *coeffs = NULL; int i, ret; totaltime_start = gettime(); /* on the first run, init the solver */ if (!ms) context_init(cctkGH, &ms); cp = get_coord_patch(ms, x, y, z); CCTK_TimerStart("MaximalSlicingAxi_Solve"); ms_solver_solve(ms->solver); CCTK_TimerStop("MaximalSlicingAxi_Solve"); coeffs = ms->solver->coeffs; if (export_coeffs) memcpy(alpha_coeffs, coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); CCTK_TimerStart("MaximalSlicingAxi_Expand"); expand_start = gettime(); #if 0 #pragma omp parallel for for (int k = 0; k < cctk_lsh[2]; k++) { for (int i = 0; i < cctk_lsh[0]; i++) { int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k); double xx = x[idx]; double zz = z[idx]; double r = sqrt(SQR(xx) + SQR(zz)); double phi = atan2(zz, xx); double val = 1.0; for (int l = 0; l < ms->nb_coeffs_z; l++) { double tmp = 0.0; for (int m = 0; m < ms->nb_coeffs_x; m++) { const int idx_coeff = l * ms->nb_coeffs_x + m; tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m); } val += tmp * ms->basis1->eval(phi, l); } alp[idx] = val; } } #else cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < cctk_lsh[2]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { const int idx_grid = j * cctk_lsh[0] + i; const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); alpha[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = 1.0 + val; } #endif //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, // 0.0, cp->transform_z, ms->nb_coeffs_x); //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); ms->grid_expand_time += gettime() - expand_start; ms->grid_expand_count++; CCTK_TimerStop("MaximalSlicingAxi_Expand"); //CCTK_TimerStart("MaximalSlicingAxi_Polish"); //msa_sor(ms, cp); //CCTK_TimerStop("MaximalSlicingAxi_Polish"); /* print stats */ if (!(ms->grid_expand_count & 255)) { fprintf(stderr, "Maximal slicing stats:\n"); ms_solver_print_stats(ms->solver); fprintf(stderr, "%lu evals: total time %g s, avg time per call %g ms\n", ms->grid_expand_count, (double)ms->grid_expand_time / 1e6, (double)ms->grid_expand_time / ms->grid_expand_count / 1e3); } }