1 files changed, 557 insertions, 0 deletions
diff --git a/brill_data.c b/brill_data.c
new file mode 100644
index 0000000..6eeadb6
--- /dev/null
+++ b/brill_data.c
@@ -0,0 +1,557 @@
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_blas.h>
+#include <gsl/gsl_linalg.h>
+
+#include <lapacke.h>
+
+#include "brill_data.h"
+
+#define ACC_TEST 1
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) > (y) ? (y) : (x))
+#define SQR(x) ((x) * (x))
+#define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0)
+
+/*
+ * small number to avoid r=0 singularities
+ */
+#define EPS 1E-08
+
+/* a set of basis functions */
+typedef struct BasisSet {
+    /* evaluate the idx-th basis function at the specified point*/
+    long double (*eval)      (long double coord, int idx, long double sf);
+    /* evaluate the first derivative of the idx-th basis function at the specified point*/
+    long double (*eval_diff1)(long double coord, int idx, long double sf);
+    /* evaluate the second derivative of the idx-th basis function at the specified point*/
+    long double (*eval_diff2)(long double coord, int idx, long double sf);
+    /**
+     * Get the idx-th collocation point for the specified order.
+     * idx runs from 0 to order - 1 (inclusive)
+     */
+    long double (*colloc_point)(int order, int idx, long double sf);
+} BasisSet;
+
+typedef struct BDPriv {
+    const BasisSet *basis;
+
+    double   (*q_func)(struct BDContext *bd, double rho, double z);
+    double (*d2q_func)(struct BDContext *bd, double rho, double z);
+
+    int nb_colloc_points;
+    int nb_colloc_points_rho;
+    int nb_colloc_points_z;
+
+    int colloc_grid_order_rho;
+    int colloc_grid_order_z;
+    int nb_coeffs;
+
+    gsl_vector *coeffs;
+} BDPriv;
+
+/*
+ * The basis of even (n = 2 * idx) SB functions (Boyd 2000, Ch 17.9)
+ * SB(x, n) = sin((n + 1) arccot(|x| / L))
+ * They are symmetric wrt origin and decay as 1/x in infinity.
+ */
+
+static long double sb_even_eval(long double coord, int idx, long double sf)
+{
+    long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord));
+
+    idx *= 2;   // even only
+
+    return sinl((idx + 1) * val);
+}
+
+static long double sb_even_eval_diff1(long double coord, int idx, long double sf)
+{
+    long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord));
+
+    idx *= 2;   // even only
+
+    return - sf * (idx + 1) * SGN(coord) * cosl((idx + 1) * val) / (SQR(sf) + SQR(coord));
+}
+
+static long double sb_even_eval_diff2(long double coord, int idx, long double sf)
+{
+    long double val = (coord == 0.0) ? M_PI_2 : atanl(sf / fabsl(coord));
+
+    idx *= 2;   // even only
+
+    return sf * (idx + 1) * SGN(coord) * (2 * fabsl(coord) * cosl((idx + 1) * val) - sf * (idx + 1) * sinl((idx + 1) * val)) / SQR(SQR(sf) + SQR(coord));
+}
+
+static long double sb_even_colloc_point(int order, int idx, long double sf)
+{
+    long double t;
+
+    idx = order - idx - 1;
+    order *= 2;
+
+    t = (idx + 2) * M_PI / (order + 4);
+    return sf / tanl(t);
+}
+
+static const BasisSet sb_even_basis = {
+    .eval         = sb_even_eval,
+    .eval_diff1   = sb_even_eval_diff1,
+    .eval_diff2   = sb_even_eval_diff2,
+    .colloc_point = sb_even_colloc_point,
+};
+
+static int brill_construct_matrix(BDContext *bd, gsl_matrix *mat,
+                                  gsl_vector *rhs)
+{
+    BDPriv *s = bd->priv;
+    const long double sfr = bd->basis_scale_factor_rho, sfz = bd->basis_scale_factor_z;
+
+    double *basis_val, *basis_dval, *basis_d2val;
+    int idx_coeff_rho, idx_coeff_z, idx_grid_rho, idx_grid_z;
+
+    /* precompute the basis values we will need */
+    // FIXME: assumes same number of points in ρ and z directions
+    basis_val   = malloc(sizeof(*basis_val)   * s->nb_colloc_points_rho * bd->nb_coeffs_rho);
+    basis_dval  = malloc(sizeof(*basis_dval)  * s->nb_colloc_points_rho * bd->nb_coeffs_rho);
+    basis_d2val = malloc(sizeof(*basis_d2val) * s->nb_colloc_points_rho * bd->nb_coeffs_rho);
+    for (int i = 0; i < s->nb_colloc_points_rho; i++) {
+        double coord = s->basis->colloc_point(s->colloc_grid_order_rho, i, sfr);
+        for (int j = 0; j < bd->nb_coeffs_rho; j++) {
+            basis_val  [i * bd->nb_coeffs_rho + j] = s->basis->eval(coord, j, sfr);
+            basis_dval [i * bd->nb_coeffs_rho + j] = s->basis->eval_diff1(coord, j, sfr);
+            basis_d2val[i * bd->nb_coeffs_rho + j] = s->basis->eval_diff2(coord, j, sfr);
+        }
+    }
+
+    for (idx_grid_z = 0; idx_grid_z < s->nb_colloc_points_z; idx_grid_z++) {
+        double z_physical = s->basis->colloc_point(s->colloc_grid_order_z, idx_grid_z, sfz);
+
+        for (idx_grid_rho = 0; idx_grid_rho < s->nb_colloc_points_rho; idx_grid_rho++) {
+            double x_physical = s->basis->colloc_point(s->colloc_grid_order_rho, idx_grid_rho, sfr);
+            double d2q        = s->d2q_func(bd, x_physical, z_physical);
+            int idx_grid         = idx_grid_z * s->nb_colloc_points_z + idx_grid_rho;
+
+            for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++)
+                for (idx_coeff_rho = 0; idx_coeff_rho < bd->nb_coeffs_rho; idx_coeff_rho++) {
+                    int idx_coeff = idx_coeff_z * bd->nb_coeffs_z + idx_coeff_rho;
+
+                    mat->data[idx_grid + mat->tda * idx_coeff] = basis_d2val[idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z   * bd->nb_coeffs_z + idx_coeff_z] * x_physical +
+                                                                 basis_dval [idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z   * bd->nb_coeffs_z + idx_coeff_z] +
+                                                                 basis_d2val[idx_grid_z   * bd->nb_coeffs_z   + idx_coeff_z]   * basis_val[idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * x_physical +
+                                                                 basis_val  [idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho] * basis_val[idx_grid_z   * bd->nb_coeffs_z + idx_coeff_z] * 0.25 * d2q * x_physical;
+                }
+            rhs->data[idx_grid] = -0.25 * d2q * x_physical;
+        }
+    }
+
+    free(basis_val);
+    free(basis_dval);
+    free(basis_d2val);
+
+    return 0;
+}
+
+static int brill_solve_linear(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs)
+{
+    int *ipiv;
+    gsl_matrix *mat_f;
+    double cond, ferr, berr, rpivot;
+
+    gsl_vector   *x = *px;
+    gsl_vector *rhs = *prhs;
+    char      equed = 'N';
+    int         ret = 0;
+
+    ipiv  = malloc(mat->size1 * sizeof(*ipiv));
+    mat_f = gsl_matrix_alloc(mat->size1, mat->size2);
+    if (!ipiv || !mat_f) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', mat->size1, 1,
+                   mat->data, mat->tda, mat_f->data, mat_f->tda, ipiv, &equed,
+                   NULL, NULL, rhs->data, rhs->size, x->data, x->size,
+                   &cond, &ferr, &berr, &rpivot);
+
+    fprintf(stderr, "LU factorization solution to a %zdx%zd matrix: "
+            "condition number %16.16g; forward error %16.16g backward error %16.16g\n",
+            mat->size1, mat->size2, cond, ferr, berr);
+fail:
+    gsl_matrix_free(mat_f);
+    free(ipiv);
+
+    return ret;
+}
+
+static int brill_solve_svd(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs)
+{
+    double *sv;
+    int rank;
+
+    gsl_vector   *x = *px;
+    gsl_vector *rhs = *prhs;
+
+    sv = malloc(sizeof(*sv) * x->size);
+    if (!sv)
+        return -ENOMEM;
+
+    LAPACKE_dgelsd(LAPACK_COL_MAJOR, rhs->size, x->size, 1, mat->data, mat->tda,
+                   rhs->data, rhs->size, sv, -1, &rank);
+
+    fprintf(stderr, "Least squares SVD solution to a %zdx%zd matrix: "
+            "rank %d, condition number %16.16g\n", mat->size1, mat->size2,
+            rank, sv[x->size - 1] / sv[0]);
+
+    gsl_vector_free(x);
+
+    *px         = rhs;
+    (*px)->size = mat->size1;
+    *prhs       = NULL;
+
+    free(sv);
+
+    return 0;
+}
+
+/*
+ * Solve the equation
+ * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0
+ * for the coefficients of spectral approximation of ψ:
+ * ψ(r, z) = 1 + ΣaᵢⱼTᵢ(r)Tⱼ(z)
+ * where i =  { 0, ... , bd->nb_coeffs_rho };
+ *       j =  { 0, ... , bd->nb_coeffs_z };
+ * q(r, z) and Tᵢ(x) are defined by bd->q_func, bd->laplace_q_func and
+ * bd->basis.
+ *
+ * The cofficients are exported in bd->priv->coeffs
+ */
+static int brill_solve(BDContext *bd)
+{
+    BDPriv *s = bd->priv;
+    gsl_matrix *mat = NULL;
+    gsl_vector *coeffs = NULL, *rhs = NULL;
+
+#if ACC_TEST
+    gsl_vector *rhs_bkp = NULL;
+    gsl_matrix *mat_bkp = NULL;
+#endif
+
+    int ret = 0;
+
+    /* allocate and fill the pseudospectral matrix */
+    mat    = gsl_matrix_alloc (s->nb_coeffs, s->nb_colloc_points); // inverted order for lapack
+    coeffs = gsl_vector_alloc (s->nb_coeffs);
+    rhs    = gsl_vector_calloc(s->nb_colloc_points);
+    if (!mat || !coeffs || !rhs) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    /* fill the matrix */
+    ret = brill_construct_matrix(bd, mat, rhs);
+    if (ret < 0)
+        goto fail;
+
+#if ACC_TEST
+    /* make backups of the matrix and RHS, since they might be destroyed later */
+    mat_bkp = gsl_matrix_alloc(mat->size2, mat->size1);
+    rhs_bkp = gsl_vector_alloc(rhs->size);
+    if (!mat_bkp || !rhs_bkp) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    gsl_vector_memcpy(rhs_bkp, rhs);
+    gsl_matrix_transpose_memcpy(mat_bkp, mat);
+#endif
+
+    /* solve for the coeffs */
+    if (s->nb_colloc_points > s->nb_coeffs)
+        ret = brill_solve_svd(mat, &coeffs, &rhs);
+    else
+        ret = brill_solve_linear(mat, &coeffs, &rhs);
+
+    /* export the result */
+    s->coeffs = coeffs;
+
+#if ACC_TEST
+    {
+        double min, max, rmin, rmax;
+
+        gsl_vector_minmax(rhs_bkp, &rmin, &rmax);
+        gsl_blas_dgemv(CblasNoTrans, 1.0, mat_bkp, coeffs, -1.0, rhs_bkp);
+        gsl_vector_minmax(rhs_bkp, &min, &max);
+
+        fprintf(stderr, "max(|A·x - rhs|) / max(|rhs|): %16.16g\n",
+                MAX(fabs(min), fabs(max)) / MAX(fabs(rmin), fabs(rmax)));
+    }
+#endif
+
+fail:
+
+#if ACC_TEST
+    gsl_matrix_free(mat_bkp);
+    gsl_vector_free(rhs_bkp);
+#endif
+
+    if (ret < 0)
+        gsl_vector_free(coeffs);
+
+    gsl_vector_free(rhs);
+    gsl_matrix_free(mat);
+
+    return ret;
+}
+
+// q function form from PHYSICAL REVIEW D 88, 103009 (2013)
+// with σ and ρ_0 hardcoded to 1 for now
+static double q_gundlach(BDContext *bd, double rho, double z)
+{
+    return bd->amplitude * SQR(rho) * exp(- (SQR(rho) + SQR(z)));
+}
+
+static double d2q_gundlach(BDContext *bd, double rho, double z)
+{
+    double r2 = SQR(rho);
+    double z2 = SQR(z);
+    double e  = exp(-r2 - z2);
+
+    return 2 * bd->amplitude * e * (1.0 + 2 * r2 * (r2 + z2 - 3));
+}
+
+static double q_eppley(BDContext *bd, double rho, double z)
+{
+    double r2 = SQR(rho) + SQR(z);
+    return bd->amplitude * SQR(rho) / (1.0 + pow(r2, bd->eppley_n / 2.0));
+}
+
+static double d2q_eppley(BDContext *bd, double rho, double z)
+{
+    double rho2 = SQR(rho);
+    double z2   = SQR(z);
+    double rho4 = SQR(rho2);
+    double z4 = SQR(z2);
+    double z6 = z4 * z2;
+
+    double r2 = rho2 + z2;
+    double r = sqrt(r2);
+
+    return bd->amplitude * (2.0 + r2 * (7 * rho4 * rho4 + 23 * rho4 * rho2 * z2 + 27 * rho4 * z4 + rho2 * (13 * z6 - 41 * r) + 2 * z2 * (z6 + 2 * r))) / pow(1.0 + r2 * r2 * r, 3);
+}
+
+static int brill_init_check_options(BDContext *bd)
+{
+    BDPriv *s = bd->priv;
+
+    switch (bd->q_func_type) {
+    case BD_Q_FUNC_GUNDLACH:
+        s->q_func   = q_gundlach;
+        s->d2q_func = d2q_gundlach;
+        break;
+    case BD_Q_FUNC_EPPLEY:
+        s->q_func   = q_eppley;
+        s->d2q_func = d2q_eppley;
+
+        if (bd->eppley_n < 4) {
+            fprintf(stderr, "Invalid n: %d < 4\n", bd->eppley_n);
+            return -EINVAL;
+        }
+        break;
+    default:
+        fprintf(stderr, "Unknown q function type: %d\n", bd->q_func_type);
+        return -EINVAL;
+    }
+
+    if (!bd->nb_coeffs_z)
+        bd->nb_coeffs_z = bd->nb_coeffs_rho;
+
+    if (bd->nb_coeffs_rho != bd->nb_coeffs_z) {
+        fprintf(stderr, "Different ρ and z basis orders are not supported.\n");
+        return -EINVAL;
+    }
+
+    s->basis = &sb_even_basis;
+
+    s->nb_coeffs = bd->nb_coeffs_rho * bd->nb_coeffs_z;
+
+    s->nb_colloc_points_rho = bd->nb_coeffs_rho + bd->overdet_rho;
+    s->nb_colloc_points_z   = bd->nb_coeffs_z   + bd->overdet_z;
+
+    s->nb_colloc_points     = s->nb_colloc_points_rho * s->nb_colloc_points_z;
+
+    s->colloc_grid_order_rho = bd->nb_coeffs_rho + bd->colloc_grid_offset_rho;
+    s->colloc_grid_order_z   = bd->nb_coeffs_z   + bd->colloc_grid_offset_z;
+
+    return 0;
+}
+
+int bd_solve(BDContext *bd)
+{
+    BDPriv *s = bd->priv;
+    int ret;
+
+    if (bd->psi_minus1_coeffs) {
+        fprintf(stderr, "Solve called multiple times on the same context.\n");
+        return -EINVAL;
+    }
+
+    ret = brill_init_check_options(bd);
+    if (ret < 0)
+        return ret;
+
+    ret = brill_solve(bd);
+    if (ret < 0)
+        return ret;
+
+    bd->psi_minus1_coeffs = s->coeffs->data;
+    bd->stride            = bd->nb_coeffs_rho;
+
+    return 0;
+}
+
+BDContext *bd_context_alloc(void)
+{
+    BDContext *bd = calloc(1, sizeof(*bd));
+
+    if (!bd)
+        return NULL;
+
+    bd->q_func_type = BD_Q_FUNC_GUNDLACH;
+    bd->amplitude   = 1.0;
+    bd->eppley_n    = 5;
+
+    bd->nb_coeffs_rho = 80;
+    bd->nb_coeffs_z   = 0;
+
+    bd->colloc_grid_offset_rho = 3;
+    bd->colloc_grid_offset_z   = 3;
+
+    bd->basis_scale_factor_rho = 3.0;
+    bd->basis_scale_factor_z   = 3.0;
+
+    bd->priv = calloc(1, sizeof(BDPriv));
+    if (!bd->priv) {
+        free(bd);
+        return NULL;
+    }
+
+    return bd;
+}
+
+void bd_context_free(BDContext **pbd)
+{
+    BDContext *bd = *pbd;
+    BDPriv *s;
+
+    if (!bd)
+        return;
+
+    s = bd->priv;
+
+    gsl_vector_free(s->coeffs);
+
+    free(bd->priv);
+    free(bd);
+    *pbd = NULL;
+}
+
+#if 0
+void brill_data(CCTK_ARGUMENTS)
+{
+    static BrillDataContext *prev_bd;
+
+    DECLARE_CCTK_ARGUMENTS;
+    DECLARE_CCTK_PARAMETERS;
+
+    BrillDataContext *bd;
+
+    long double *basis_val_r, *basis_val_z;
+
+    int64_t grid_size = CCTK_GFINDEX3D(cctkGH,
+                                       cctk_lsh[0] - 1,
+                                       cctk_lsh[1] - 1,
+                                       cctk_lsh[2] - 1) + 1;
+
+    /* on the first run, solve the equation for ψ */
+    if (!prev_bd) {
+        bd = init_bd(cctkGH, amplitude, eppley_n, basis_order_r, basis_order_z,
+                     colloc_grid_offset_r, colloc_grid_offset_z,
+                     scale_factor, overdet);
+
+        brill_solve(bd);
+
+        if (export_coeffs)
+            memcpy(brill_coeffs, bd->psi_coeffs->data, sizeof(*brill_coeffs) * bd->nb_coeffs);
+
+        prev_bd = bd;
+    } else
+        bd = prev_bd;
+
+    memset(kxx, 0, sizeof(*kxx) * grid_size);
+    memset(kyy, 0, sizeof(*kyy) * grid_size);
+    memset(kzz, 0, sizeof(*kzz) * grid_size);
+    memset(kxy, 0, sizeof(*kxy) * grid_size);
+    memset(kxz, 0, sizeof(*kxz) * grid_size);
+    memset(kyz, 0, sizeof(*kyz) * grid_size);
+    memset(gxz, 0, sizeof(*kxz) * grid_size);
+    memset(gyz, 0, sizeof(*kyz) * grid_size);
+
+    /* precompute the basis values on the grid points */
+    basis_val_r = malloc(sizeof(*basis_val_r) * bd->nb_coeffs_rho * cctk_lsh[1] * cctk_lsh[0]);
+    basis_val_z = malloc(sizeof(*basis_val_z) * bd->nb_coeffs_z * cctk_lsh[2]);
+
+    for (int i = 0; i < cctk_lsh[2]; i++) {
+        CCTK_REAL zz = z[CCTK_GFINDEX3D(cctkGH, 0, 0, i)];
+        for (int j = 0; j < bd->nb_coeffs_z; j++)
+            basis_val_z[i * bd->nb_coeffs_z + j] = bd->basis->eval(zz, j, bd->basis_scale_factor_rho);
+    }
+
+    for (int j = 0; j < cctk_lsh[1]; j++)
+        for (int i = 0; i < cctk_lsh[0]; i++) {
+            CCTK_REAL xx = x[CCTK_GFINDEX3D(cctkGH, i, j, 0)];
+            CCTK_REAL yy = y[CCTK_GFINDEX3D(cctkGH, i, j, 0)];
+            CCTK_REAL r = sqrt(SQR(xx) + SQR(yy));
+
+            for (int k = 0; k < bd->nb_coeffs_rho; k++)
+                basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_rho + k] = bd->basis->eval(r, k, bd->basis_scale_factor_rho);
+        }
+
+#pragma omp parallel for
+    for (int k = 0; k < cctk_lsh[2]; k++)
+        for (int j = 0; j < cctk_lsh[1]; j++)
+            for (int i = 0; i < cctk_lsh[0]; i++) {
+                int    index = CCTK_GFINDEX3D(cctkGH, i, j, k);
+                long double xx = x[index], yy = y[index], zz = z[index];
+
+                long double r2 = SQR(xx) + SQR(yy);
+                long double r  = sqrtl(r2);
+
+                long double q   = bd->q_func(bd, r, zz);
+                long double e2q = expl(2 * q);
+
+                long double psi = 1.0, psi4;
+
+                for (int n = 0; n < bd->nb_coeffs_z; n++)
+                    for (int m = 0; m < bd->nb_coeffs_rho; m++)
+                        psi += bd->psi_coeffs->data[n * bd->nb_coeffs_z + m] * basis_val_r[(j * cctk_lsh[0] + i) * bd->nb_coeffs_rho + m] * basis_val_z[k * bd->nb_coeffs_z + n];
+
+                psi4 = SQR(SQR(psi));
+
+                if (r2 < EPS)
+                    r2 = EPS;
+
+                gxx[index] = psi4 * (e2q + (1 - e2q) * SQR(yy) / r2);
+                gyy[index] = psi4 * (e2q + (1 - e2q) * SQR(xx) / r2);
+                gzz[index] = psi4 * e2q;
+                gxy[index] = psi4 * (-(1.0 - e2q) * xx * yy / r2);
+            }
+}
+#endif