#include #include #include #include #include #include #include #include #include #include #include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #define ACC_TEST 1 #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define MIN(x, y) ((x) > (y) ? (y) : (x)) #define SQR(x) ((x) * (x)) #define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0) /* * small number to avoid r=0 singularities */ #define EPS 1E-08 /* a set of basis functions */ typedef struct BasisSet { /* evaluate the idx-th basis function at the specified point*/ long double (*eval) (long double coord, int idx); /* evaluate the first derivative of the idx-th basis function at the specified point*/ long double (*eval_diff1)(long double coord, int idx); /* evaluate the second derivative of the idx-th basis function at the specified point*/ long double (*eval_diff2)(long double coord, int idx); /** * Get the idx-th collocation point for the specified order. * idx runs from 0 to order - 1 (inclusive) */ long double (*colloc_point)(int order, int idx); } BasisSet; /* * The basis of even (n = 2 * idx) SB functions (Boyd 2000, Ch 17.9) * SB(x, n) = sin((n + 1) arccot(|x| / L)) * They are symmetric wrt origin and decay as 1/x in infinity. */ static CCTK_REAL scale_factor; #define SCALE_FACTOR scale_factor static long double sb_even_eval(long double coord, int idx) { long double val = (coord == 0.0) ? M_PI_2 : atanl(SCALE_FACTOR / fabsl(coord)); idx *= 2; // even only return sinl((idx + 1) * val); } static long double sb_even_eval_diff1(long double coord, int idx) { long double val = (coord == 0.0) ? M_PI_2 : atanl(SCALE_FACTOR / fabsl(coord)); idx *= 2; // even only return - SCALE_FACTOR * (idx + 1) * SGN(coord) * cosl((idx + 1) * val) / (SQR(SCALE_FACTOR) + SQR(coord)); } static long double sb_even_eval_diff2(long double coord, int idx) { long double val = (coord == 0.0) ? M_PI_2 : atanl(SCALE_FACTOR / fabsl(coord)); idx *= 2; // even only return SCALE_FACTOR * (idx + 1) * SGN(coord) * (2 * fabsl(coord) * cosl((idx + 1) * val) - SCALE_FACTOR * (idx + 1) * sinl((idx + 1) * val)) / SQR(SQR(SCALE_FACTOR) + SQR(coord)); } static long double sb_even_colloc_point(int order, int idx) { long double t; idx = order - idx - 1; order *= 2; t = (idx + 2) * M_PI / (order + 4); return SCALE_FACTOR / tanl(t); } static const BasisSet sb_even_basis = { .eval = sb_even_eval, .eval_diff1 = sb_even_eval_diff1, .eval_diff2 = sb_even_eval_diff2, .colloc_point = sb_even_colloc_point, }; typedef struct BrillDataContext { /* options */ CCTK_REAL amplitude; double (*q_func)(struct BrillDataContext *bd, double rho, double z); double (*laplace_q_func)(struct BrillDataContext *bd, double rho, double z); /* state */ const BasisSet *basis; int nb_coeffs_x; int nb_coeffs_z; int nb_coeffs; int nb_colloc_points_x; int nb_colloc_points_z; int nb_colloc_points; int colloc_grid_order_x; int colloc_grid_order_z; gsl_vector *psi_coeffs; } BrillDataContext; static int brill_construct_matrix(BrillDataContext *bd, gsl_matrix *mat, gsl_vector *rhs) { CCTK_REAL *basis_val, *basis_dval, *basis_d2val; int idx_coeff_x, idx_coeff_z, idx_grid_x, idx_grid_z; /* precompute the basis values we will need */ // FIXME: assumes same number of points in x and z directions basis_val = malloc(sizeof(*basis_val) * bd->nb_colloc_points_x * bd->nb_coeffs_x); basis_dval = malloc(sizeof(*basis_dval) * bd->nb_colloc_points_x * bd->nb_coeffs_x); basis_d2val = malloc(sizeof(*basis_d2val) * bd->nb_colloc_points_x * bd->nb_coeffs_x); for (int i = 0; i < bd->nb_colloc_points_x; i++) { CCTK_REAL coord = bd->basis->colloc_point(bd->colloc_grid_order_x, i); for (int j = 0; j < bd->nb_coeffs_x; j++) { basis_val [i * bd->nb_coeffs_x + j] = bd->basis->eval(coord, j); basis_dval [i * bd->nb_coeffs_x + j] = bd->basis->eval_diff1(coord, j); basis_d2val[i * bd->nb_coeffs_x + j] = bd->basis->eval_diff2(coord, j); } } for (idx_grid_z = 0; idx_grid_z < bd->nb_colloc_points_z; idx_grid_z++) { CCTK_REAL z_physical = bd->basis->colloc_point(bd->colloc_grid_order_z, idx_grid_z); for (idx_grid_x = 0; idx_grid_x < bd->nb_colloc_points_x; idx_grid_x++) { CCTK_REAL x_physical = bd->basis->colloc_point(bd->colloc_grid_order_x, idx_grid_x); CCTK_REAL d2q = bd->laplace_q_func(bd, x_physical, z_physical); int idx_grid = idx_grid_z * bd->nb_colloc_points_z + idx_grid_x; for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++) for (idx_coeff_x = 0; idx_coeff_x < bd->nb_coeffs_x; idx_coeff_x++) { int idx_coeff = idx_coeff_z * bd->nb_coeffs_z + idx_coeff_x; mat->data[idx_grid + mat->tda * idx_coeff] = basis_d2val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * x_physical + basis_dval [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] + basis_d2val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * basis_val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * x_physical + basis_val [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * 0.25 * d2q * x_physical; } rhs->data[idx_grid] = -0.25 * d2q * x_physical; } } free(basis_val); free(basis_dval); free(basis_d2val); return 0; } static int solve_linear(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) { int *ipiv; gsl_matrix *mat_f; double cond, ferr, berr, rpivot; gsl_vector *x = *px; gsl_vector *rhs = *prhs; char equed = 'N'; int ret = 0; ipiv = malloc(mat->size1 * sizeof(*ipiv)); mat_f = gsl_matrix_alloc(mat->size1, mat->size2); if (!ipiv || !mat_f) { ret = -ENOMEM; goto fail; } LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', mat->size1, 1, mat->data, mat->tda, mat_f->data, mat_f->tda, ipiv, &equed, NULL, NULL, rhs->data, rhs->size, x->data, x->size, &cond, &ferr, &berr, &rpivot); CCTK_VInfo(CCTK_THORNSTRING, "LU factorization solution to a %zdx%zd matrix: " "condition number %16.16g; forward error %16.16g backward error %16.16g", mat->size1, mat->size2, cond, ferr, berr); fail: gsl_matrix_free(mat_f); free(ipiv); return ret; } static int solve_svd(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) { double *sv; int rank; gsl_vector *x = *px; gsl_vector *rhs = *prhs; sv = malloc(sizeof(*sv) * x->size); if (!sv) return -ENOMEM; LAPACKE_dgelsd(LAPACK_COL_MAJOR, rhs->size, x->size, 1, mat->data, mat->tda, rhs->data, rhs->size, sv, -1, &rank); CCTK_VInfo(CCTK_THORNSTRING, "Least squares SVD solution to a %zdx%zd matrix: " "rank %d, condition number %16.16g", mat->size1, mat->size2, rank, sv[x->size - 1] / sv[0]); gsl_vector_free(x); *px = rhs; (*px)->size = mat->size1; *prhs = NULL; free(sv); return 0; } /* * Solve the equation * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0 * for the coefficients of spectral approximation of ψ: * ψ(r, z) = 1 + ΣaᵢⱼTᵢ(r)Tⱼ(z) * where i = { 0, ... , bd->nb_coeffs_x }; * j = { 0, ... , bd->nb_coeffs_z }; * q(r, z) and Tᵢ(x) are defined by bd->q_func, bd->laplace_q_func and * bd->basis. * * The cofficients are exported in bd->psi_coeffs */ static int brill_solve(BrillDataContext *bd) { gsl_matrix *mat = NULL; gsl_vector *coeffs = NULL, *rhs = NULL; #if ACC_TEST gsl_vector *rhs_bkp = NULL; gsl_matrix *mat_bkp = NULL; #endif int ret = 0; /* allocate and fill the pseudospectral matrix */ mat = gsl_matrix_alloc (bd->nb_coeffs, bd->nb_colloc_points); // inverted order for lapack coeffs = gsl_vector_alloc (bd->nb_coeffs); rhs = gsl_vector_calloc(bd->nb_colloc_points); if (!mat || !coeffs || !rhs) { ret = -ENOMEM; goto fail; } /* fill the matrix */ ret = brill_construct_matrix(bd, mat, rhs); if (ret < 0) goto fail; #if ACC_TEST /* make backups of the matrix and RHS, since they might be destroyed later */ mat_bkp = gsl_matrix_alloc(mat->size2, mat->size1); rhs_bkp = gsl_vector_alloc(rhs->size); if (!mat_bkp || !rhs_bkp) { ret = -ENOMEM; goto fail; } gsl_vector_memcpy(rhs_bkp, rhs); gsl_matrix_transpose_memcpy(mat_bkp, mat); #endif /* solve for the coeffs */ if (bd->nb_colloc_points > bd->nb_coeffs) ret = solve_svd(mat, &coeffs, &rhs); else ret = solve_linear(mat, &coeffs, &rhs); /* export the result to the caller */ bd->psi_coeffs = coeffs; #if ACC_TEST { double min, max, rmin, rmax; gsl_vector_minmax(rhs_bkp, &rmin, &rmax); gsl_blas_dgemv(CblasNoTrans, 1.0, mat_bkp, coeffs, -1.0, rhs_bkp); gsl_vector_minmax(rhs_bkp, &min, &max); CCTK_VInfo(CCTK_THORNSTRING, "max(|A·x - rhs|) / max(|rhs|): %16.16g", MAX(fabs(min), fabs(max)) / MAX(fabs(rmin), fabs(rmax))); } #endif fail: #if ACC_TEST gsl_matrix_free(mat_bkp); gsl_vector_free(rhs_bkp); #endif if (ret < 0) gsl_vector_free(coeffs); gsl_vector_free(rhs); gsl_matrix_free(mat); return ret; } // q function form from PHYSICAL REVIEW D 88, 103009 (2013) // with σ and ρ_0 hardcoded to 1 for now static double q_gundlach(BrillDataContext *bd, double rho, double z) { return bd->amplitude * SQR(rho) * exp(- (SQR(rho) + SQR(z))); } static double laplace_q_gundlach(BrillDataContext *bd, double rho, double z) { double r2 = SQR(rho); double z2 = SQR(z); double e = exp(-r2 - z2); return 2 * bd->amplitude * e * (1.0 + 2 * r2 * (r2 + z2 - 3)); } static BrillDataContext *init_bd(cGH *cctkGH, CCTK_REAL amplitude, int basis_order_r, int basis_order_z, int colloc_offset_r, int colloc_offset_z, double sf, int overdet) { BrillDataContext *bd; if (basis_order_r != basis_order_z) CCTK_WARN(0, "Different r and z basis orders are not supported."); bd = malloc(sizeof(*bd)); bd->q_func = q_gundlach; bd->laplace_q_func = laplace_q_gundlach; bd->amplitude = amplitude; bd->basis = &sb_even_basis; bd->nb_coeffs_x = basis_order_r; bd->nb_coeffs_z = basis_order_z; bd->nb_coeffs = bd->nb_coeffs_x * bd->nb_coeffs_z; bd->nb_colloc_points_x = basis_order_r + overdet; bd->nb_colloc_points_z = basis_order_z + overdet; bd->nb_colloc_points = bd->nb_colloc_points_x * bd->nb_colloc_points_z; bd->colloc_grid_order_x = basis_order_r + colloc_offset_r; bd->colloc_grid_order_z = basis_order_z + colloc_offset_z; scale_factor = sf; return bd; } void brill_data(CCTK_ARGUMENTS) { static BrillDataContext *prev_bd; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; BrillDataContext *bd; long double *basis_val_r, *basis_val_z; int64_t grid_size = CCTK_GFINDEX3D(cctkGH, cctk_lsh[0] - 1, cctk_lsh[1] - 1, cctk_lsh[2] - 1) + 1; /* on the first run, solve the equation for ψ */ if (!prev_bd) { bd = init_bd(cctkGH, amplitude, basis_order_r, basis_order_z, colloc_grid_offset_r, colloc_grid_offset_z, scale_factor, overdet); brill_solve(bd); if (export_coeffs) memcpy(brill_coeffs, bd->psi_coeffs->data, sizeof(*brill_coeffs) * bd->nb_coeffs); prev_bd = bd; } else bd = prev_bd; memset(kxx, 0, sizeof(*kxx) * grid_size); memset(kyy, 0, sizeof(*kyy) * grid_size); memset(kzz, 0, sizeof(*kzz) * grid_size); memset(kxy, 0, sizeof(*kxy) * grid_size); memset(kxz, 0, sizeof(*kxz) * grid_size); memset(kyz, 0, sizeof(*kyz) * grid_size); memset(gxz, 0, sizeof(*kxz) * grid_size); memset(gyz, 0, sizeof(*kyz) * grid_size); /* precompute the basis values on the grid points */ basis_val_r = malloc(sizeof(*basis_val_r) * bd->nb_coeffs_x * cctk_lsh[1] * cctk_lsh[0]); basis_val_z = malloc(sizeof(*basis_val_z) * bd->nb_coeffs_z * cctk_lsh[2]); for (int i = 0; i < cctk_lsh[2]; i++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(cctkGH, 0, 0, i)]; for (int j = 0; j < bd->nb_coeffs_z; j++) basis_val_z[i * bd->nb_coeffs_z + j] = bd->basis->eval(zz, j); } for (int j = 0; j < cctk_lsh[1]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { CCTK_REAL xx = x[CCTK_GFINDEX3D(cctkGH, i, j, 0)]; CCTK_REAL yy = y[CCTK_GFINDEX3D(cctkGH, i, j, 0)]; CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); for (int k = 0; k < bd->nb_coeffs_x; k++) basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_x + k] = bd->basis->eval(r, k); } #pragma omp parallel for for (int k = 0; k < cctk_lsh[2]; k++) for (int j = 0; j < cctk_lsh[1]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { int index = CCTK_GFINDEX3D(cctkGH, i, j, k); long double xx = x[index], yy = y[index], zz = z[index]; long double r2 = SQR(xx) + SQR(yy); long double r = sqrtl(r2); long double q = bd->q_func(bd, r, zz); long double e2q = expl(2 * q); long double psi = 1.0, psi4; for (int n = 0; n < bd->nb_coeffs_z; n++) for (int m = 0; m < bd->nb_coeffs_x; m++) psi += bd->psi_coeffs->data[n * bd->nb_coeffs_z + m] * basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_x + m] * basis_val_z[k * bd->nb_coeffs_z + n]; psi4 = SQR(SQR(psi)); if (r2 < EPS) r2 = EPS; gxx[index] = psi4 * (e2q + (1 - e2q) * SQR(yy) / r2); gyy[index] = psi4 * (e2q + (1 - e2q) * SQR(xx) / r2); gzz[index] = psi4 * e2q; gxy[index] = psi4 * (-(1.0 - e2q) * xx * yy / r2); } }