From d451e140926584ae59b0a0552d5463b9d6114ac2 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 28 Aug 2016 11:20:16 +0200
Subject: Switch to polar coordinates in the solver.

---
 README     |   5 +-
 basis.c    |  46 +++++++++++++++++-
 basis.h    |   2 +
 eval.c     | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 init.c     |   9 +++-
 internal.h |   6 +++
 solve.c    |  93 +++++++++++++++++++++++++++++++++++-
 7 files changed, 302 insertions(+), 17 deletions(-)

diff --git a/README b/README
index 9f5492f..7d04c1b 100644
--- a/README
+++ b/README
@@ -7,8 +7,9 @@ relativity simulations.
 
 The construction involves solving an elliptic equation for the conformal factor.
 This is done using a pseudo-spectral method by expanding the conformal factor in
-the basis of SB functions (see Boyd 2000, chapter 17.9). The pseudo-spectral
-matrix is inverted with LAPACK.
+the basis of SB functions (see Boyd 2000, chapter 17.9) in the radial direction
+and even cosines in the angular direction. The pseudo-spectral matrix is
+inverted with LAPACK.
 
 Building and installation
 =========================
diff --git a/basis.c b/basis.c
index bb3a8e9..cad7222 100644
--- a/basis.c
+++ b/basis.c
@@ -79,13 +79,29 @@ static double sb_even_eval_diff2(const BasisSetContext *s, double coord, int idx
     return sf * (idx + 1) * (2 * coord * cos((idx + 1) * val) - sf * (idx + 1) * sin((idx + 1) * val)) / SQR(SQR(sf) + SQR(coord));
 }
 
+static void sb_eval_multi(const BasisSetContext *s, double coord,
+                          double **out, int order_start, int order_end)
+{
+    const double sf = s->sf;
+    const double val = atan2(sf, coord);
+
+    for (int i = order_start; i < order_end; i++) {
+        const int idx = 2 * i;
+        const double sval = sin((idx + 1) * val);
+        const double cval = cos((idx + 1) * val);
+        out[0][i] = sval;
+        out[1][i] = - sf * (idx + 1) * cval / (SQR(sf) + SQR(coord));
+        out[2][i] = sf * (idx + 1) * (2 * coord * cval - sf * (idx + 1) * sval) / SQR(SQR(sf) + SQR(coord));
+    }
+}
+
 static double sb_even_colloc_point(const BasisSetContext *s, int order, int idx)
 {
     double t;
 
     idx = order - idx - 1;
 
-    t = (idx + 2) * M_PI / (2 * order + 2);
+    t = (idx + 2) * M_PI / (2 * order + 3);
     return s->sf / tan(t);
 }
 
@@ -98,6 +114,7 @@ static const BasisSet sb_even_basis = {
     .eval         = sb_even_eval,
     .eval_diff1   = sb_even_eval_diff1,
     .eval_diff2   = sb_even_eval_diff2,
+    .eval_multi   = sb_eval_multi,
     .colloc_point = sb_even_colloc_point,
 };
 
@@ -116,6 +133,18 @@ static double cos_even_eval_diff2(const BasisSetContext *s, double coord, int id
     return -4 * SQR(idx) * cos(2 * idx * coord);
 }
 
+static void cos_even_eval_multi(const BasisSetContext *s, double coord,
+                                double **out, int order_start, int order_end)
+{
+    for (int i = order_start; i < order_end; i++) {
+        const double sval = sin(2 * i * coord);
+        const double cval = cos(2 * i * coord);
+        out[0][i] = cval;
+        out[1][i] = -2 * i * sval;
+        out[2][i] = -4 * SQR(i) * cval;
+    }
+}
+
 static double cos_even_colloc_point(const BasisSetContext *s, int order, int idx)
 {
     return M_PI * idx / (2 * order);
@@ -125,6 +154,7 @@ static const BasisSet cos_even_basis = {
     .eval         = cos_even_eval,
     .eval_diff1   = cos_even_eval_diff1,
     .eval_diff2   = cos_even_eval_diff2,
+    .eval_multi   = cos_even_eval_multi,
     .colloc_point = cos_even_colloc_point,
 };
 
@@ -141,6 +171,20 @@ double bdi_basis_eval(BasisSetContext *s, enum BSEvalType type, double coord, in
     return eval(s, coord, order);
 }
 
+double bdi_basis_eval_multi(BasisSetContext *s, double coord, double **out,
+                            int order_start, int order_end)
+{
+    if (s->bs->eval_multi)
+        s->bs->eval_multi(s, coord, out, order_start, order_end);
+    else {
+        for (int i = order_start; i < order_end; i++) {
+            out[0][i] = s->bs->eval(s, coord, i);
+            out[1][i] = s->bs->eval_diff1(s, coord, i);
+            out[2][i] = s->bs->eval_diff2(s, coord, i);
+        }
+    }
+}
+
 double bdi_basis_colloc_point(BasisSetContext *s, int idx, int order)
 {
     return s->bs->colloc_point(s, order, idx);
diff --git a/basis.h b/basis.h
index 68c9597..8a3fefa 100644
--- a/basis.h
+++ b/basis.h
@@ -36,6 +36,8 @@ void bdi_basis_free(BasisSetContext **s);
 
 double bdi_basis_eval(BasisSetContext *s, enum BSEvalType type,
                       double coord, int order);
+double bdi_basis_eval_multi(BasisSetContext *s, double coord, double **out,
+                            int order_start, int order_end);
 double bdi_basis_colloc_point(BasisSetContext *s, int idx, int order);
 
 #endif /* BRILL_DATA_BASIS_H */
diff --git a/eval.c b/eval.c
index e724447..a27d0f5 100644
--- a/eval.c
+++ b/eval.c
@@ -29,29 +29,139 @@
 #include "internal.h"
 #include "qfunc.h"
 
-int bd_eval_psi(const BDContext *bd,
-                const double *rho, int nb_coords_rho,
-                const double *z,   int nb_coords_z,
-                const unsigned int diff_order[2],
-                double *psi, unsigned int psi_stride)
+static int eval_psi_radial(const BDContext *bd,
+                           const double *rho, int nb_coords_rho,
+                           const double *z,   int nb_coords_z,
+                           const unsigned int diff_order[2],
+                           double *psi, unsigned int psi_stride)
 {
     BDPriv *s = bd->priv;
     enum BSEvalType type;
 
-    double *basis_val_rho = NULL, *basis_val_z = NULL;
+    double *basis_val[2][3] = { { NULL } };
 
     double add = 0.0;
 
     int ret = 0;
 
-    if (diff_order[0] > 2 || diff_order[1] > 2) {
-        bdi_log(bd, 0, "Derivatives of higher order than 2 are not supported.\n");
-        return -ENOSYS;
-    }
-
     if (diff_order[0] == 0 && diff_order[1] == 0)
         add = 1.0;
 
+    /* precompute the basis values on the grid points */
+    for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) {
+        posix_memalign((void**)&basis_val[0][i], REQ_ALIGNMENT(char), sizeof(*basis_val[0][i]) * s->nb_coeffs[0]);
+        posix_memalign((void**)&basis_val[1][i], REQ_ALIGNMENT(char), sizeof(*basis_val[1][i]) * s->nb_coeffs[1]);
+        if (!basis_val[0][i] || !basis_val[1][i]) {
+            ret = -ENOMEM;
+            goto fail;
+        }
+    }
+
+#define   BASIS0 (basis_val[0][0][n])
+#define  DBASIS0 (basis_val[0][1][n])
+#define D2BASIS0 (basis_val[0][2][n])
+#define   BASIS1 (basis_val[1][0][m])
+#define  DBASIS1 (basis_val[1][1][m])
+#define D2BASIS1 (basis_val[1][2][m])
+
+    for (int i = 0; i < nb_coords_z; i++) {
+        double *dst = psi + i * psi_stride;
+        double zz = z[i];
+
+        for (int j = 0; j < nb_coords_rho; j++) {
+            double ppsi = add;
+            double rrho = rho[j];
+
+            double r    = sqrt(SQR(rrho) + SQR(zz));
+            double phi  = atan2(zz, rrho);
+
+            bdi_basis_eval_multi(s->basis[0], r,   basis_val[0], 0, s->nb_coeffs[0]);
+            bdi_basis_eval_multi(s->basis[1], phi, basis_val[1], 0, s->nb_coeffs[1]);
+
+            if (diff_order[0] == 0 && diff_order[1] == 0) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    double tmp = 0.0;
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        tmp += s->coeffs[m * s->coeffs_stride + n] * BASIS0;
+                    }
+                    ppsi += tmp * BASIS1;
+                }
+            } else if (diff_order[0] == 2 && diff_order[1] == 0) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        double basis_val_20 =  ((r > EPS) ? (SQR(rrho / r) * D2BASIS0 * BASIS1 + SQR(zz / SQR(r)) * BASIS0 * D2BASIS1
+                                                             + (1.0 - SQR(rrho / r)) / r * DBASIS0 * BASIS1
+                                                             + 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1
+                                                             - 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+                        ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_20;
+                    }
+                }
+            } else if (diff_order[0] == 0 && diff_order[1] == 2) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        double basis_val_02 =  ((r > EPS) ? (SQR(zz / r) * D2BASIS0 * BASIS1 + SQR(rrho / SQR(r)) * BASIS0 * D2BASIS1
+                                                               + (1.0 - SQR(zz / r)) / r * DBASIS0 * BASIS1
+                                                               - 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1
+                                                               + 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+                        ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_02;
+                    }
+                }
+            } else if (diff_order[0] == 1 && diff_order[1] == 0) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * rrho / r - BASIS0 * DBASIS1 * zz / SQR(r)) : 0.0);
+
+                        ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_10;
+                    }
+                }
+            } else if (diff_order[0] == 0 && diff_order[1] == 1) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        double basis_val_01 = ((r > EPS) ? (DBASIS0 * BASIS1 * zz / r + BASIS0 * DBASIS1 * rrho / SQR(r)) : 0.0);
+
+                        ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_01;
+                    }
+                }
+            } else if (diff_order[0] == 1 && diff_order[1] == 1) {
+                for (int m = 0; m < s->nb_coeffs[1]; m++) {
+                    for (int n = 0; n < s->nb_coeffs[0]; n++) {
+                        double basis_val_11 = ((r > EPS) ? (rrho * zz / SQR(r) * D2BASIS0 * BASIS1 - rrho * zz / SQR(SQR(r)) * BASIS0 * D2BASIS1
+                                                            - rrho * zz / (r * SQR(r)) * DBASIS0 * BASIS1
+                                                            - (1.0 - SQR(zz / r)) / SQR(r) * BASIS0 * DBASIS1
+                                                            + (SQR(rrho) - SQR(zz)) / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+
+                        ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_11;
+                    }
+                }
+            }
+            *dst++ = ppsi;
+        }
+    }
+
+fail:
+    for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) {
+        free(basis_val[0][i]);
+        free(basis_val[1][i]);
+    }
+
+    return ret;
+}
+
+static int eval_psi_cylindric(const BDContext *bd,
+                              const double *rho, int nb_coords_rho,
+                              const double *z,   int nb_coords_z,
+                              const unsigned int diff_order[2],
+                              double *psi, unsigned int psi_stride)
+{
+    BDPriv *s = bd->priv;
+    enum BSEvalType type;
+
+    double *basis_val_rho = NULL, *basis_val_z = NULL;
+
+    double add = 0.0;
+
+    int ret = 0;
+
     /* precompute the basis values on the grid points */
     basis_val_rho = malloc(sizeof(*basis_val_rho) * s->nb_coeffs[0] * nb_coords_rho);
     basis_val_z   = malloc(sizeof(*basis_val_z)   * s->nb_coeffs[1] * nb_coords_z);
@@ -105,6 +215,32 @@ fail:
 
 }
 
+int bd_eval_psi(const BDContext *bd,
+                const double *rho, int nb_coords_rho,
+                const double *z,   int nb_coords_z,
+                const unsigned int diff_order[2],
+                double *psi, unsigned int psi_stride)
+{
+    BDPriv *s = bd->priv;
+    int ret = 0;
+
+    if (diff_order[0] > 2 || diff_order[1] > 2) {
+        bdi_log(bd, 0, "Derivatives of higher order than 2 are not supported.\n");
+        return -ENOSYS;
+    }
+
+    switch (s->coord_system) {
+    case COORD_SYSTEM_CYLINDRICAL: ret = eval_psi_cylindric(bd, rho, nb_coords_rho, z, nb_coords_z,
+                                                            diff_order, psi, psi_stride);
+                                   break;
+    case COORD_SYSTEM_RADIAL:      ret = eval_psi_radial(bd, rho, nb_coords_rho, z, nb_coords_z,
+                                                         diff_order, psi, psi_stride);
+                                   break;
+    }
+
+    return ret;
+
+}
 
 int bd_eval_metric(const BDContext *bd, const double *rho, int nb_coords_rho,
                    const double *z, int nb_coords_z,
diff --git a/init.c b/init.c
index fa15325..72dabed 100644
--- a/init.c
+++ b/init.c
@@ -36,8 +36,15 @@ static int brill_init_check_options(BDContext *bd)
     if (!s->qfunc)
         return ret;
 
+    s->coord_system = COORD_SYSTEM_RADIAL;
+
     s->basis[0] = bdi_basis_init(BASIS_FAMILY_SB_EVEN, bd->basis_scale_factor[0]);
-    s->basis[1] = bdi_basis_init(BASIS_FAMILY_SB_EVEN, bd->basis_scale_factor[0]);
+
+    if (s->coord_system == COORD_SYSTEM_RADIAL)
+        s->basis[1] = bdi_basis_init(BASIS_FAMILY_COS_EVEN, 0);
+    else
+        s->basis[1] = bdi_basis_init(BASIS_FAMILY_SB_EVEN, bd->basis_scale_factor[0]);
+
     if (!s->basis[0] || !s->basis[1])
         return -ENOMEM;
 
diff --git a/internal.h b/internal.h
index 501d390..372cc0d 100644
--- a/internal.h
+++ b/internal.h
@@ -38,8 +38,14 @@
  */
 #define EPS 1E-08
 
+enum CoordSystem {
+    COORD_SYSTEM_CYLINDRICAL,
+    COORD_SYSTEM_RADIAL,
+};
 
 typedef struct BDPriv {
+    int coord_system;
+
     BasisSetContext *basis[2];
     QFuncContext    *qfunc;
 
diff --git a/solve.c b/solve.c
index fe47832..fa527a9 100644
--- a/solve.c
+++ b/solve.c
@@ -34,7 +34,7 @@
 #include "internal.h"
 #include "qfunc.h"
 
-static int brill_construct_matrix(const BDContext *bd, double *mat,
+static int brill_construct_matrix_cylindrical(const BDContext *bd, double *mat,
                                   double *rhs)
 {
     BDPriv *s = bd->priv;
@@ -104,6 +104,90 @@ fail:
     return ret;
 }
 
+static int brill_construct_matrix_polar(const BDContext *bd, double *mat,
+                                        double *rhs)
+{
+    BDPriv *s = bd->priv;
+
+    double *basis_val, *basis_dval, *basis_d2val;
+    int idx_coeff_0, idx_coeff_1, idx_grid_0, idx_grid_1;
+
+    double *basis[2][3] = { { NULL } };
+    int ret = 0;
+
+    /* precompute the basis values we will need */
+    for (int i = 0; i < ARRAY_ELEMS(basis); i++) {
+        for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++) {
+            basis[i][j] = malloc(sizeof(*basis[i][j]) * s->nb_colloc_points[i] * s->nb_coeffs[i]);
+            if (!basis[i][j]) {
+                ret = -ENOMEM;
+                goto fail;
+            }
+        }
+        for (int j = 0; j < s->nb_colloc_points[i]; j++) {
+            double coord = bdi_basis_colloc_point(s->basis[i], j, s->nb_coeffs[i]);
+            for (int k = 0; k < s->nb_coeffs[i]; k++) {
+                basis[i][0][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_VALUE, coord, k);
+                basis[i][1][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF1, coord, k);
+                basis[i][2][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF2, coord, k);
+            }
+        }
+    }
+
+#define   BASIS0 (basis[0][0][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0])
+#define  DBASIS0 (basis[0][1][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0])
+#define D2BASIS0 (basis[0][2][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0])
+#define   BASIS1 (basis[1][0][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1])
+#define  DBASIS1 (basis[1][1][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1])
+#define D2BASIS1 (basis[1][2][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1])
+
+    for (idx_grid_1 = 0; idx_grid_1 < s->nb_colloc_points[1]; idx_grid_1++) {
+        double phi = bdi_basis_colloc_point(s->basis[1], idx_grid_1, s->nb_coeffs[1]);
+
+        for (idx_grid_0 = 0; idx_grid_0 < s->nb_colloc_points[0]; idx_grid_0++) {
+            double r = bdi_basis_colloc_point(s->basis[0], idx_grid_0, s->nb_coeffs[0]);
+
+            double x = r * cos(phi);
+            double z = r * sin(phi);
+
+            double d2q = bdi_qfunc_eval(s->qfunc, x, z, 2) +
+                         bdi_qfunc_eval(s->qfunc, x, z, 6);
+            int idx_grid  = idx_grid_1 * s->nb_colloc_points[0] + idx_grid_0;
+
+            for (idx_coeff_1 = 0; idx_coeff_1 < s->nb_coeffs[1]; idx_coeff_1++)
+                for (idx_coeff_0 = 0; idx_coeff_0 < s->nb_coeffs[0]; idx_coeff_0++) {
+                    int idx_coeff = idx_coeff_1 * s->nb_coeffs[0] + idx_coeff_0;
+
+                    double basis_val_20 =  ((r > EPS) ? (SQR(x / r) * D2BASIS0 * BASIS1 + SQR(z / SQR(r)) * BASIS0 * D2BASIS1
+                                                         + (1.0 - SQR(x / r)) / r * DBASIS0 * BASIS1
+                                                         + 2 * x * z / SQR(SQR(r)) * BASIS0 * DBASIS1
+                                                         - 2 * z * x / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+                    double basis_val_02 =  ((r > EPS) ? (SQR(z / r) * D2BASIS0 * BASIS1 + SQR(x / SQR(r)) * BASIS0 * D2BASIS1
+                                                           + (1.0 - SQR(z / r)) / r * DBASIS0 * BASIS1
+                                                           - 2 * x * z / SQR(SQR(r)) * BASIS0 * DBASIS1
+                                                           + 2 * z * x / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+                    double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * x / r - BASIS0 * DBASIS1 * z / SQR(r)) : 0.0);
+
+                    double val  = basis_val_20 + basis_val_02 + BASIS0 * BASIS1 * 0.25 * d2q;
+                    if (fabs(x) > EPS)
+                        val += basis_val_10 / fabs(x);
+                    else
+                        val += basis_val_20;
+
+                    mat[idx_grid + NB_COLLOC_POINTS(s) * idx_coeff] = val;
+                }
+            rhs[idx_grid] = -0.25 * d2q;
+        }
+    }
+
+fail:
+    for (int i = 0; i < ARRAY_ELEMS(basis); i++)
+        for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++)
+            free(basis[i][j]);
+
+    return ret;
+}
+
 static int brill_solve_linear(const BDContext *bd, double *mat, double **px, double **prhs)
 {
     const BDPriv *s = bd->priv;
@@ -223,7 +307,12 @@ int bdi_solve(BDContext *bd)
     }
 
     /* fill the matrix */
-    ret = brill_construct_matrix(bd, mat, rhs);
+    switch (s->coord_system) {
+    case COORD_SYSTEM_CYLINDRICAL: ret = brill_construct_matrix_cylindrical(bd, mat, rhs);
+                                   break;
+    case COORD_SYSTEM_RADIAL:      ret = brill_construct_matrix_polar(bd, mat, rhs);
+                                   break;
+    }
     if (ret < 0)
         goto fail;
 
-- 
cgit v1.2.3