From 4c52660b125a296b449efb4f30c18d498f21e17c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 26 Oct 2014 11:30:59 +0100 Subject: Initial commit. --- brill_data.c | 557 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ brill_data.h | 128 ++++++++++++++ 2 files changed, 685 insertions(+) create mode 100644 brill_data.c create mode 100644 brill_data.h diff --git a/brill_data.c b/brill_data.c new file mode 100644 index 0000000..6eeadb6 --- /dev/null +++ b/brill_data.c @@ -0,0 +1,557 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "brill_data.h" + +#define ACC_TEST 1 + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define SQR(x) ((x) * (x)) +#define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0) + +/* + * small number to avoid r=0 singularities + */ +#define EPS 1E-08 + +/* a set of basis functions */ +typedef struct BasisSet { + /* evaluate the idx-th basis function at the specified point*/ + long double (*eval) (long double coord, int idx, long double sf); + /* evaluate the first derivative of the idx-th basis function at the specified point*/ + long double (*eval_diff1)(long double coord, int idx, long double sf); + /* evaluate the second derivative of the idx-th basis function at the specified point*/ + long double (*eval_diff2)(long double coord, int idx, long double sf); + /** + * Get the idx-th collocation point for the specified order. + * idx runs from 0 to order - 1 (inclusive) + */ + long double (*colloc_point)(int order, int idx, long double sf); +} BasisSet; + +typedef struct BDPriv { + const BasisSet *basis; + + double (*q_func)(struct BDContext *bd, double rho, double z); + double (*d2q_func)(struct BDContext *bd, double rho, double z); + + int nb_colloc_points; + int nb_colloc_points_rho; + int nb_colloc_points_z; + + int colloc_grid_order_rho; + int colloc_grid_order_z; + int nb_coeffs; + + gsl_vector *coeffs; +} BDPriv; + +/* + * The basis of even (n = 2 * idx) SB functions (Boyd 2000, Ch 17.9) + * SB(x, n) = sin((n + 1) arccot(|x| / L)) + * They are symmetric wrt origin and decay as 1/x in infinity. + */ + +static long double sb_even_eval(long double coord, int idx, long double sf) +{ + long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord)); + + idx *= 2; // even only + + return sinl((idx + 1) * val); +} + +static long double sb_even_eval_diff1(long double coord, int idx, long double sf) +{ + long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord)); + + idx *= 2; // even only + + return - sf * (idx + 1) * SGN(coord) * cosl((idx + 1) * val) / (SQR(sf) + SQR(coord)); +} + +static long double sb_even_eval_diff2(long double coord, int idx, long double sf) +{ + long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord)); + + idx *= 2; // even only + + return sf * (idx + 1) * SGN(coord) * (2 * fabsl(coord) * cosl((idx + 1) * val) - sf * (idx + 1) * sinl((idx + 1) * val)) / SQR(SQR(sf) + SQR(coord)); +} + +static long double sb_even_colloc_point(int order, int idx, long double sf) +{ + long double t; + + idx = order - idx - 1; + order *= 2; + + t = (idx + 2) * M_PI / (order + 4); + return sf / tanl(t); +} + +static const BasisSet sb_even_basis = { + .eval = sb_even_eval, + .eval_diff1 = sb_even_eval_diff1, + .eval_diff2 = sb_even_eval_diff2, + .colloc_point = sb_even_colloc_point, +}; + +static int brill_construct_matrix(BDContext *bd, gsl_matrix *mat, + gsl_vector *rhs) +{ + BDPriv *s = bd->priv; + const long double sfr = bd->basis_scale_factor_rho, sfz = bd->basis_scale_factor_z; + + double *basis_val, *basis_dval, *basis_d2val; + int idx_coeff_rho, idx_coeff_z, idx_grid_rho, idx_grid_z; + + /* precompute the basis values we will need */ + // FIXME: assumes same number of points in ρ and z directions + basis_val = malloc(sizeof(*basis_val) * s->nb_colloc_points_rho * bd->nb_coeffs_rho); + basis_dval = malloc(sizeof(*basis_dval) * s->nb_colloc_points_rho * bd->nb_coeffs_rho); + basis_d2val = malloc(sizeof(*basis_d2val) * s->nb_colloc_points_rho * bd->nb_coeffs_rho); + for (int i = 0; i < s->nb_colloc_points_rho; i++) { + double coord = s->basis->colloc_point(s->colloc_grid_order_rho, i, sfr); + for (int j = 0; j < bd->nb_coeffs_rho; j++) { + basis_val [i * bd->nb_coeffs_rho + j] = s->basis->eval(coord, j, sfr); + basis_dval [i * bd->nb_coeffs_rho + j] = s->basis->eval_diff1(coord, j, sfr); + basis_d2val[i * bd->nb_coeffs_rho + j] = s->basis->eval_diff2(coord, j, sfr); + } + } + + for (idx_grid_z = 0; idx_grid_z < s->nb_colloc_points_z; idx_grid_z++) { + double z_physical = s->basis->colloc_point(s->colloc_grid_order_z, idx_grid_z, sfz); + + for (idx_grid_rho = 0; idx_grid_rho < s->nb_colloc_points_rho; idx_grid_rho++) { + double x_physical = s->basis->colloc_point(s->colloc_grid_order_rho, idx_grid_rho, sfr); + double d2q = s->d2q_func(bd, x_physical, z_physical); + int idx_grid = idx_grid_z * s->nb_colloc_points_z + idx_grid_rho; + + for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++) + for (idx_coeff_rho = 0; idx_coeff_rho < bd->nb_coeffs_rho; idx_coeff_rho++) { + int idx_coeff = idx_coeff_z * bd->nb_coeffs_z + idx_coeff_rho; + + mat->data[idx_grid + mat->tda * idx_coeff] = basis_d2val[idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * x_physical + + basis_dval [idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] + + basis_d2val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * basis_val[idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * x_physical + + basis_val [idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * 0.25 * d2q * x_physical; + } + rhs->data[idx_grid] = -0.25 * d2q * x_physical; + } + } + + free(basis_val); + free(basis_dval); + free(basis_d2val); + + return 0; +} + +static int brill_solve_linear(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) +{ + int *ipiv; + gsl_matrix *mat_f; + double cond, ferr, berr, rpivot; + + gsl_vector *x = *px; + gsl_vector *rhs = *prhs; + char equed = 'N'; + int ret = 0; + + ipiv = malloc(mat->size1 * sizeof(*ipiv)); + mat_f = gsl_matrix_alloc(mat->size1, mat->size2); + if (!ipiv || !mat_f) { + ret = -ENOMEM; + goto fail; + } + + LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', mat->size1, 1, + mat->data, mat->tda, mat_f->data, mat_f->tda, ipiv, &equed, + NULL, NULL, rhs->data, rhs->size, x->data, x->size, + &cond, &ferr, &berr, &rpivot); + + fprintf(stderr, "LU factorization solution to a %zdx%zd matrix: " + "condition number %16.16g; forward error %16.16g backward error %16.16g\n", + mat->size1, mat->size2, cond, ferr, berr); +fail: + gsl_matrix_free(mat_f); + free(ipiv); + + return ret; +} + +static int brill_solve_svd(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) +{ + double *sv; + int rank; + + gsl_vector *x = *px; + gsl_vector *rhs = *prhs; + + sv = malloc(sizeof(*sv) * x->size); + if (!sv) + return -ENOMEM; + + LAPACKE_dgelsd(LAPACK_COL_MAJOR, rhs->size, x->size, 1, mat->data, mat->tda, + rhs->data, rhs->size, sv, -1, &rank); + + fprintf(stderr, "Least squares SVD solution to a %zdx%zd matrix: " + "rank %d, condition number %16.16g\n", mat->size1, mat->size2, + rank, sv[x->size - 1] / sv[0]); + + gsl_vector_free(x); + + *px = rhs; + (*px)->size = mat->size1; + *prhs = NULL; + + free(sv); + + return 0; +} + +/* + * Solve the equation + * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0 + * for the coefficients of spectral approximation of ψ: + * ψ(r, z) = 1 + ΣaᵢⱼTᵢ(r)Tⱼ(z) + * where i = { 0, ... , bd->nb_coeffs_rho }; + * j = { 0, ... , bd->nb_coeffs_z }; + * q(r, z) and Tᵢ(x) are defined by bd->q_func, bd->laplace_q_func and + * bd->basis. + * + * The cofficients are exported in bd->priv->coeffs + */ +static int brill_solve(BDContext *bd) +{ + BDPriv *s = bd->priv; + gsl_matrix *mat = NULL; + gsl_vector *coeffs = NULL, *rhs = NULL; + +#if ACC_TEST + gsl_vector *rhs_bkp = NULL; + gsl_matrix *mat_bkp = NULL; +#endif + + int ret = 0; + + /* allocate and fill the pseudospectral matrix */ + mat = gsl_matrix_alloc (s->nb_coeffs, s->nb_colloc_points); // inverted order for lapack + coeffs = gsl_vector_alloc (s->nb_coeffs); + rhs = gsl_vector_calloc(s->nb_colloc_points); + if (!mat || !coeffs || !rhs) { + ret = -ENOMEM; + goto fail; + } + + /* fill the matrix */ + ret = brill_construct_matrix(bd, mat, rhs); + if (ret < 0) + goto fail; + +#if ACC_TEST + /* make backups of the matrix and RHS, since they might be destroyed later */ + mat_bkp = gsl_matrix_alloc(mat->size2, mat->size1); + rhs_bkp = gsl_vector_alloc(rhs->size); + if (!mat_bkp || !rhs_bkp) { + ret = -ENOMEM; + goto fail; + } + + gsl_vector_memcpy(rhs_bkp, rhs); + gsl_matrix_transpose_memcpy(mat_bkp, mat); +#endif + + /* solve for the coeffs */ + if (s->nb_colloc_points > s->nb_coeffs) + ret = brill_solve_svd(mat, &coeffs, &rhs); + else + ret = brill_solve_linear(mat, &coeffs, &rhs); + + /* export the result */ + s->coeffs = coeffs; + +#if ACC_TEST + { + double min, max, rmin, rmax; + + gsl_vector_minmax(rhs_bkp, &rmin, &rmax); + gsl_blas_dgemv(CblasNoTrans, 1.0, mat_bkp, coeffs, -1.0, rhs_bkp); + gsl_vector_minmax(rhs_bkp, &min, &max); + + fprintf(stderr, "max(|A·x - rhs|) / max(|rhs|): %16.16g\n", + MAX(fabs(min), fabs(max)) / MAX(fabs(rmin), fabs(rmax))); + } +#endif + +fail: + +#if ACC_TEST + gsl_matrix_free(mat_bkp); + gsl_vector_free(rhs_bkp); +#endif + + if (ret < 0) + gsl_vector_free(coeffs); + + gsl_vector_free(rhs); + gsl_matrix_free(mat); + + return ret; +} + +// q function form from PHYSICAL REVIEW D 88, 103009 (2013) +// with σ and ρ_0 hardcoded to 1 for now +static double q_gundlach(BDContext *bd, double rho, double z) +{ + return bd->amplitude * SQR(rho) * exp(- (SQR(rho) + SQR(z))); +} + +static double d2q_gundlach(BDContext *bd, double rho, double z) +{ + double r2 = SQR(rho); + double z2 = SQR(z); + double e = exp(-r2 - z2); + + return 2 * bd->amplitude * e * (1.0 + 2 * r2 * (r2 + z2 - 3)); +} + +static double q_eppley(BDContext *bd, double rho, double z) +{ + double r2 = SQR(rho) + SQR(z); + return bd->amplitude * SQR(rho) / (1.0 + pow(r2, bd->eppley_n / 2.0)); +} + +static double d2q_eppley(BDContext *bd, double rho, double z) +{ + double rho2 = SQR(rho); + double z2 = SQR(z); + double rho4 = SQR(rho2); + double z4 = SQR(z2); + double z6 = z4 * z2; + + double r2 = rho2 + z2; + double r = sqrt(r2); + + return bd->amplitude * (2.0 + r2 * (7 * rho4 * rho4 + 23 * rho4 * rho2 * z2 + 27 * rho4 * z4 + rho2 * (13 * z6 - 41 * r) + 2 * z2 * (z6 + 2 * r))) / pow(1.0 + r2 * r2 * r, 3); +} + +static int brill_init_check_options(BDContext *bd) +{ + BDPriv *s = bd->priv; + + switch (bd->q_func_type) { + case BD_Q_FUNC_GUNDLACH: + s->q_func = q_gundlach; + s->d2q_func = d2q_gundlach; + break; + case BD_Q_FUNC_EPPLEY: + s->q_func = q_eppley; + s->d2q_func = d2q_eppley; + + if (bd->eppley_n < 4) { + fprintf(stderr, "Invalid n: %d < 4\n", bd->eppley_n); + return -EINVAL; + } + break; + default: + fprintf(stderr, "Unknown q function type: %d\n", bd->q_func_type); + return -EINVAL; + } + + if (!bd->nb_coeffs_z) + bd->nb_coeffs_z = bd->nb_coeffs_rho; + + if (bd->nb_coeffs_rho != bd->nb_coeffs_z) { + fprintf(stderr, "Different ρ and z basis orders are not supported.\n"); + return -EINVAL; + } + + s->basis = &sb_even_basis; + + s->nb_coeffs = bd->nb_coeffs_rho * bd->nb_coeffs_z; + + s->nb_colloc_points_rho = bd->nb_coeffs_rho + bd->overdet_rho; + s->nb_colloc_points_z = bd->nb_coeffs_z + bd->overdet_z; + + s->nb_colloc_points = s->nb_colloc_points_rho * s->nb_colloc_points_z; + + s->colloc_grid_order_rho = bd->nb_coeffs_rho + bd->colloc_grid_offset_rho; + s->colloc_grid_order_z = bd->nb_coeffs_z + bd->colloc_grid_offset_z; + + return 0; +} + +int bd_solve(BDContext *bd) +{ + BDPriv *s = bd->priv; + int ret; + + if (bd->psi_minus1_coeffs) { + fprintf(stderr, "Solve called multiple times on the same context.\n"); + return -EINVAL; + } + + ret = brill_init_check_options(bd); + if (ret < 0) + return ret; + + ret = brill_solve(bd); + if (ret < 0) + return ret; + + bd->psi_minus1_coeffs = s->coeffs->data; + bd->stride = bd->nb_coeffs_rho; + + return 0; +} + +BDContext *bd_context_alloc(void) +{ + BDContext *bd = calloc(1, sizeof(*bd)); + + if (!bd) + return NULL; + + bd->q_func_type = BD_Q_FUNC_GUNDLACH; + bd->amplitude = 1.0; + bd->eppley_n = 5; + + bd->nb_coeffs_rho = 80; + bd->nb_coeffs_z = 0; + + bd->colloc_grid_offset_rho = 3; + bd->colloc_grid_offset_z = 3; + + bd->basis_scale_factor_rho = 3.0; + bd->basis_scale_factor_z = 3.0; + + bd->priv = calloc(1, sizeof(BDPriv)); + if (!bd->priv) { + free(bd); + return NULL; + } + + return bd; +} + +void bd_context_free(BDContext **pbd) +{ + BDContext *bd = *pbd; + BDPriv *s; + + if (!bd) + return; + + s = bd->priv; + + gsl_vector_free(s->coeffs); + + free(bd->priv); + free(bd); + *pbd = NULL; +} + +#if 0 +void brill_data(CCTK_ARGUMENTS) +{ + static BrillDataContext *prev_bd; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + BrillDataContext *bd; + + long double *basis_val_r, *basis_val_z; + + int64_t grid_size = CCTK_GFINDEX3D(cctkGH, + cctk_lsh[0] - 1, + cctk_lsh[1] - 1, + cctk_lsh[2] - 1) + 1; + + /* on the first run, solve the equation for ψ */ + if (!prev_bd) { + bd = init_bd(cctkGH, amplitude, eppley_n, basis_order_r, basis_order_z, + colloc_grid_offset_r, colloc_grid_offset_z, + scale_factor, overdet); + + brill_solve(bd); + + if (export_coeffs) + memcpy(brill_coeffs, bd->psi_coeffs->data, sizeof(*brill_coeffs) * bd->nb_coeffs); + + prev_bd = bd; + } else + bd = prev_bd; + + memset(kxx, 0, sizeof(*kxx) * grid_size); + memset(kyy, 0, sizeof(*kyy) * grid_size); + memset(kzz, 0, sizeof(*kzz) * grid_size); + memset(kxy, 0, sizeof(*kxy) * grid_size); + memset(kxz, 0, sizeof(*kxz) * grid_size); + memset(kyz, 0, sizeof(*kyz) * grid_size); + memset(gxz, 0, sizeof(*kxz) * grid_size); + memset(gyz, 0, sizeof(*kyz) * grid_size); + + /* precompute the basis values on the grid points */ + basis_val_r = malloc(sizeof(*basis_val_r) * bd->nb_coeffs_rho * cctk_lsh[1] * cctk_lsh[0]); + basis_val_z = malloc(sizeof(*basis_val_z) * bd->nb_coeffs_z * cctk_lsh[2]); + + for (int i = 0; i < cctk_lsh[2]; i++) { + CCTK_REAL zz = z[CCTK_GFINDEX3D(cctkGH, 0, 0, i)]; + for (int j = 0; j < bd->nb_coeffs_z; j++) + basis_val_z[i * bd->nb_coeffs_z + j] = bd->basis->eval(zz, j, bd->basis_scale_factor_rho); + } + + for (int j = 0; j < cctk_lsh[1]; j++) + for (int i = 0; i < cctk_lsh[0]; i++) { + CCTK_REAL xx = x[CCTK_GFINDEX3D(cctkGH, i, j, 0)]; + CCTK_REAL yy = y[CCTK_GFINDEX3D(cctkGH, i, j, 0)]; + CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); + + for (int k = 0; k < bd->nb_coeffs_rho; k++) + basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_rho + k] = bd->basis->eval(r, k, bd->basis_scale_factor_rho); + } + +#pragma omp parallel for + for (int k = 0; k < cctk_lsh[2]; k++) + for (int j = 0; j < cctk_lsh[1]; j++) + for (int i = 0; i < cctk_lsh[0]; i++) { + int index = CCTK_GFINDEX3D(cctkGH, i, j, k); + long double xx = x[index], yy = y[index], zz = z[index]; + + long double r2 = SQR(xx) + SQR(yy); + long double r = sqrtl(r2); + + long double q = bd->q_func(bd, r, zz); + long double e2q = expl(2 * q); + + long double psi = 1.0, psi4; + + for (int n = 0; n < bd->nb_coeffs_z; n++) + for (int m = 0; m < bd->nb_coeffs_rho; m++) + psi += bd->psi_coeffs->data[n * bd->nb_coeffs_z + m] * basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_rho + m] * basis_val_z[k * bd->nb_coeffs_z + n]; + + psi4 = SQR(SQR(psi)); + + if (r2 < EPS) + r2 = EPS; + + gxx[index] = psi4 * (e2q + (1 - e2q) * SQR(yy) / r2); + gyy[index] = psi4 * (e2q + (1 - e2q) * SQR(xx) / r2); + gzz[index] = psi4 * e2q; + gxy[index] = psi4 * (-(1.0 - e2q) * xx * yy / r2); + } +} +#endif diff --git a/brill_data.h b/brill_data.h new file mode 100644 index 0000000..125970b --- /dev/null +++ b/brill_data.h @@ -0,0 +1,128 @@ +/* + * Copyright 2014 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +enum BDQFuncType { + /** + * q(ρ, z) = A ρ^2 exp(-ρ^2 - z^2) + */ + BD_Q_FUNC_GUNDLACH, + /** + * q(ρ, z) = A ρ^2 / (1 + r^n); where r^2 = ρ^2 + z^2 + */ + BD_Q_FUNC_EPPLEY, +}; + +typedef struct BDContext { + /** + * private data + */ + void *priv; + + /******************************* + * options, set by the caller * + *******************************/ + + /** + * The choice of the q function in the exponent. + * Defaults to BD_Q_FUNC_GUNDLACH. + */ + enum BDQFuncType q_func_type; + /** + * The amplitude in the q function. + * Defaults to 1. + */ + double amplitude; + /** + * For BD_Q_FUNC_EPPLEY, the power in the denominator. + * Must be >= 4. + * Defaults to 5. + */ + unsigned int eppley_n; + + /* the solver parameters */ + + /** + * The number of basis functions in the ρ direction. + * Defaults to 80. + */ + unsigned int nb_coeffs_rho; + /** + * The number of basis functions in the z direction. 0 means the same as + * nb_coeffs_rho. Defaults to 0. + * Using values other than 0 or nb_coeffs_rho is unsupported for now. + */ + unsigned int nb_coeffs_z; + + /** + * The difference between the number of collocation points and the number + * of basis functions in the rho direction. I.e. the number of collocation + * points used will be nb_coeffs_rho + overdet_rho. + * Defaults to 0; + */ + int overdet_rho; + /** + * Same as overdet_rho, but for the z direction. + */ + int overdet_z; + + /** + * The difference between the index of the collocation grid used for the ρ + * direction and nb_coeffs_rho. The 'extra' collocation points furthest from + * the origin are discarded. + * Defaults to 3. + */ + unsigned int colloc_grid_offset_rho; + /** + * Same as colloc_grid_offset_rho, but for the z direction. + */ + unsigned int colloc_grid_offset_z; + + /** + * The scaling factor used in the basis functions in the ρ direction. + * Defaults to 3. + */ + double basis_scale_factor_rho; + /** + * Same as basis_scale_factor_rho, but for the z direction + */ + double basis_scale_factor_z; + + /********** + * output * + **********/ + /** + * The coefficients of the solution expanded in the basis. + * The ρ index increases along rows, z along columns. + * + * The data is owned and managed by this library and is read-only for + * the caller. + */ + double *psi_minus1_coeffs; + /** + * The number of array elements between two rows. + */ + ptrdiff_t stride; +} BDContext; + +BDContext *bd_context_alloc(void); + +void bd_context_free(BDContext **bd); + +int bd_solve(BDContext *bd); -- cgit v1.2.3