1 files changed, 230 insertions, 0 deletions
diff --git a/solve.c b/solve.c
new file mode 100644
index 0000000..ab8fc39
--- /dev/null
+++ b/solve.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2014-2015 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file
+ * the actual pseudo-spectral solver code
+ */
+
+#include <cblas.h>
+#include <lapacke.h>
+
+#include <errno.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdio.h>
+
+#include "brill_data.h"
+#include "internal.h"
+
+static int brill_construct_matrix(const BDContext *bd, double *mat,
+                                  double *rhs)
+{
+    BDPriv *s = bd->priv;
+    const double sfr = bd->basis_scale_factor_rho, sfz = bd->basis_scale_factor_z;
+
+    double *basis_val, *basis_dval, *basis_d2val;
+    int idx_coeff_rho, idx_coeff_z, idx_grid_rho, idx_grid_z;
+
+    double *basis_r[3];
+    double *basis_z[3];
+
+    /* precompute the basis values we will need */
+    for (int i = 0; i < ARRAY_ELEMS(basis_r); i++)
+        basis_r[i] = malloc(sizeof(*basis_r[i]) * s->nb_colloc_points_rho * bd->nb_coeffs_rho);
+    for (int i = 0; i < s->nb_colloc_points_rho; i++) {
+        double coord = s->basis->colloc_point(s->colloc_grid_order_rho, i, sfr);
+        for (int j = 0; j < bd->nb_coeffs_rho; j++) {
+            basis_r[0][i * bd->nb_coeffs_rho + j] = s->basis->eval(coord, j, sfr);
+            basis_r[1][i * bd->nb_coeffs_rho + j] = s->basis->eval_diff1(coord, j, sfr);
+            basis_r[2][i * bd->nb_coeffs_rho + j] = s->basis->eval_diff2(coord, j, sfr);
+        }
+    }
+
+    for (int i = 0; i < ARRAY_ELEMS(basis_z); i++)
+        basis_z[i] = malloc(sizeof(*basis_z[i]) * s->nb_colloc_points_z * bd->nb_coeffs_z);
+    for (int i = 0; i < s->nb_colloc_points_z; i++) {
+        double coord = s->basis->colloc_point(s->colloc_grid_order_z, i, sfr);
+        for (int j = 0; j < bd->nb_coeffs_z; j++) {
+            basis_z[0][i * bd->nb_coeffs_z + j] = s->basis->eval(coord, j, sfr);
+            basis_z[1][i * bd->nb_coeffs_z + j] = s->basis->eval_diff1(coord, j, sfr);
+            basis_z[2][i * bd->nb_coeffs_z + j] = s->basis->eval_diff2(coord, j, sfr);
+        }
+    }
+
+#define   BASIS_RHO (basis_r[0][idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho])
+#define  DBASIS_RHO (basis_r[1][idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho])
+#define D2BASIS_RHO (basis_r[2][idx_grid_rho * bd->nb_coeffs_rho + idx_coeff_rho])
+#define   BASIS_Z   (basis_z[0][idx_grid_z * bd->nb_coeffs_z + idx_coeff_z])
+#define  DBASIS_Z   (basis_z[1][idx_grid_z * bd->nb_coeffs_z + idx_coeff_z])
+#define D2BASIS_Z   (basis_z[2][idx_grid_z * bd->nb_coeffs_z + idx_coeff_z])
+
+    for (idx_grid_z = 0; idx_grid_z < s->nb_colloc_points_z; idx_grid_z++) {
+        double z_physical = s->basis->colloc_point(s->colloc_grid_order_z, idx_grid_z, sfz);
+
+        for (idx_grid_rho = 0; idx_grid_rho < s->nb_colloc_points_rho; idx_grid_rho++) {
+            double x_physical = s->basis->colloc_point(s->colloc_grid_order_rho, idx_grid_rho, sfr);
+            double d2q        = s->q_func->d2q_rho(bd, x_physical, z_physical) + s->q_func->d2q_z(bd, x_physical, z_physical);
+            int idx_grid         = idx_grid_z * s->nb_colloc_points_rho + idx_grid_rho;
+
+            for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++)
+                for (idx_coeff_rho = 0; idx_coeff_rho < bd->nb_coeffs_rho; idx_coeff_rho++) {
+                    int idx_coeff = idx_coeff_z * bd->nb_coeffs_rho + idx_coeff_rho;
+
+                    double val  = D2BASIS_RHO * BASIS_Z + D2BASIS_Z * BASIS_RHO + BASIS_RHO * BASIS_Z * 0.25 * d2q;
+                    if (fabs(x_physical) > EPS)
+                        val += DBASIS_RHO * BASIS_Z / fabs(x_physical);
+                    else
+                        val += D2BASIS_RHO * BASIS_Z;
+
+                    mat[idx_grid + s->nb_colloc_points * idx_coeff] = val;
+                }
+            rhs[idx_grid] = -0.25 * d2q;
+        }
+    }
+
+    for (int i = 0; i < ARRAY_ELEMS(basis_r); i++)
+        free(basis_r[i]);
+    for (int i = 0; i < ARRAY_ELEMS(basis_z); i++)
+        free(basis_z[i]);
+
+    return 0;
+}
+
+static int brill_solve_linear(const BDContext *bd, double *mat, double **px, double **prhs)
+{
+    const BDPriv *s = bd->priv;
+    const int stride = s->nb_coeffs;
+    int *ipiv;
+    double *mat_f;
+    double cond, ferr, berr, rpivot;
+
+    double   *x = *px;
+    double *rhs = *prhs;
+    char  equed = 'N';
+    int     ret = 0;
+
+    ipiv  = malloc(stride * sizeof(*ipiv));
+    mat_f = malloc(SQR(stride) * sizeof(*mat_f));
+    if (!ipiv || !mat_f) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', stride, 1,
+                   mat, stride, mat_f, stride, ipiv, &equed,
+                   NULL, NULL, rhs, stride, x, stride,
+                   &cond, &ferr, &berr, &rpivot);
+
+    bdi_log(bd, 0, "LU factorization solution to a %zdx%zd matrix: "
+            "condition number %16.16g; forward error %16.16g backward error %16.16g\n",
+            stride, stride, cond, ferr, berr);
+fail:
+    free(mat_f);
+    free(ipiv);
+
+    return ret;
+}
+
+static int brill_solve_svd(const BDContext *bd, double *mat,
+                           double **px, double **prhs)
+{
+    const BDPriv *s = bd->priv;
+    double *sv;
+    int rank;
+
+    double   *x = *px;
+    double *rhs = *prhs;
+
+    sv = malloc(sizeof(*sv) * s->nb_coeffs);
+    if (!sv)
+        return -ENOMEM;
+
+    LAPACKE_dgelsd(LAPACK_COL_MAJOR, s->nb_colloc_points, s->nb_coeffs, 1,
+                   mat, s->nb_colloc_points, rhs,
+                   MAX(s->nb_coeffs, s->nb_colloc_points),
+                   sv, -1, &rank);
+
+    bdi_log(bd, 0, "Least squares SVD solution to a %zdx%zd matrix: "
+            "rank %d, condition number %16.16g\n", s->nb_colloc_points, s->nb_coeffs,
+            rank, sv[s->nb_coeffs - 1] / sv[0]);
+
+    free(sv);
+
+    *px         = rhs;
+    *prhs       = x;
+
+    return 0;
+}
+
+/*
+ * Solve the equation
+ * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0
+ * for the coefficients of spectral approximation of ψ:
+ * ψ(r, z) = 1 + ΣaᵢⱼTᵢ(r)Tⱼ(z)
+ * where i =  { 0, ... , bd->nb_coeffs_rho };
+ *       j =  { 0, ... , bd->nb_coeffs_z };
+ * q(r, z) and Tᵢ(x) are defined by bd->q_func, bd->laplace_q_func and
+ * bd->basis.
+ *
+ * The cofficients are exported in bd->priv->coeffs
+ */
+int bdi_solve(BDContext *bd)
+{
+    BDPriv *s = bd->priv;
+
+    const int vecsize = MAX(s->nb_coeffs, s->nb_colloc_points);
+
+    double *mat = NULL;
+    double *rhs = NULL, *coeffs = NULL;
+
+    int ret = 0;
+
+    /* allocate and fill the pseudospectral matrix */
+    mat    = malloc(sizeof(*mat) * s->nb_coeffs * s->nb_colloc_points);
+    coeffs = malloc(sizeof(*coeffs) * vecsize);
+    rhs    = malloc(sizeof(*rhs)    * vecsize);
+    if (!mat || !coeffs || !rhs) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    /* fill the matrix */
+    ret = brill_construct_matrix(bd, mat, rhs);
+    if (ret < 0)
+        goto fail;
+
+    /* solve for the coeffs */
+    if (s->nb_colloc_points > s->nb_coeffs)
+        ret = brill_solve_svd(bd, mat, &coeffs, &rhs);
+    else
+        ret = brill_solve_linear(bd, mat, &coeffs, &rhs);
+
+    /* export the result */
+    s->coeffs = coeffs;
+
+fail:
+    if (ret < 0)
+        free(coeffs);
+
+    free(rhs);
+    free(mat);
+
+    return ret;
+}