summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2014-09-11 12:03:45 +0200
committerAnton Khirnov <anton@khirnov.net>2014-09-11 12:04:57 +0200
commit5817b21222c84a9aaaa486216933aa068ab0f30b (patch)
treee1e19abd4d77a7ed654f50b2a2f917e208a45c49
parent04697ee8ee0c4787e2058560d754c9e139a4417c (diff)
Rewrite the matrix inversion code.
-rw-r--r--param.ccl15
-rw-r--r--src/brill.c202
2 files changed, 173 insertions, 44 deletions
diff --git a/param.ccl b/param.ccl
index 64f2063..068f4a6 100644
--- a/param.ccl
+++ b/param.ccl
@@ -13,13 +13,22 @@ CCTK_REAL amplitude "amplitude"
: :: ""
} 1
-RESTRICTED:
-CCTK_INT cheb_order "order"
+CCTK_INT basis_order_r "Number of the basis functions in the radial direction"
+{
+ 1: :: ""
+} 40
+
+CCTK_INT basis_order_z "Number of the basis functions in the z direction"
{
1: :: ""
-} 20
+} 40
CCTK_REAL scale_factor "Scaling factor L for the SB basis"
{
0: :: ""
} 3.0
+
+CCTK_INT overdet "Solve the system overdetermined by this many equations"
+{
+ 0: :: ""
+} 0
diff --git a/src/brill.c b/src/brill.c
index 48912c4..4592081 100644
--- a/src/brill.c
+++ b/src/brill.c
@@ -8,12 +8,15 @@
#include <string.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_linalg.h>
-#include <gsl/gsl_spline.h>
+
+#include <lapacke.h>
#include "cctk.h"
#include "cctk_Arguments.h"
#include "cctk_Parameters.h"
+#define ACC_TEST 1
+
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) > (y) ? (y) : (x))
#define SQR(x) ((x) * (x))
@@ -94,8 +97,7 @@ static const BasisSet sb_even_basis = {
typedef struct BrillDataContext {
/* options */
- int cheb_order;
- float amplitude;
+ CCTK_REAL amplitude;
double (*q_func)(struct BrillDataContext *bd, double rho, double z);
double (*laplace_q_func)(struct BrillDataContext *bd, double rho, double z);
@@ -107,45 +109,121 @@ typedef struct BrillDataContext {
int nb_coeffs_z;
int nb_coeffs;
+ int nb_colloc_points_x;
+ int nb_colloc_points_z;
+ int nb_colloc_points;
+
gsl_vector *psi_coeffs;
} BrillDataContext;
static int brill_construct_matrix(BrillDataContext *bd, gsl_matrix *mat,
gsl_vector *rhs)
{
- const int nb_gridpoints_z = bd->cheb_order;
- const int nb_gridpoints_x = bd->cheb_order;
- const int nb_gridpoints = nb_gridpoints_x * nb_gridpoints_z;
-
- CCTK_REAL z_physical, x_physical;
-
+ CCTK_REAL *basis_val, *basis_dval, *basis_d2val;
int idx_coeff_x, idx_coeff_z, idx_grid_x, idx_grid_z;
- int idx_coeff, idx_grid;
- /* interior */
- for (idx_grid_z = 0; idx_grid_z < nb_gridpoints_z; idx_grid_z++) {
- CCTK_REAL z_physical = bd->basis->colloc_point(nb_gridpoints_z, idx_grid_z);
+ /* precompute the basis values we will need */
+ // FIXME: assumes same number of points in x and z directions
+ basis_val = malloc(sizeof(*basis_val) * bd->nb_colloc_points_x * bd->nb_coeffs_x);
+ basis_dval = malloc(sizeof(*basis_dval) * bd->nb_colloc_points_x * bd->nb_coeffs_x);
+ basis_d2val = malloc(sizeof(*basis_d2val) * bd->nb_colloc_points_x * bd->nb_coeffs_x);
+ for (int i = 0; i < bd->nb_colloc_points_x; i++) {
+ CCTK_REAL coord = bd->basis->colloc_point(bd->nb_colloc_points_x, i);
+ for (int j = 0; j < bd->nb_coeffs_x; j++) {
+ basis_val [i * bd->nb_coeffs_x + j] = bd->basis->eval(coord, j);
+ basis_dval [i * bd->nb_coeffs_x + j] = bd->basis->eval_diff1(coord, j);
+ basis_d2val[i * bd->nb_coeffs_x + j] = bd->basis->eval_diff2(coord, j);
+ }
+ }
- fprintf(stdout, "coord %d %g\n", idx_grid_z, z_physical);
+ for (idx_grid_z = 0; idx_grid_z < bd->nb_colloc_points_z; idx_grid_z++) {
+ CCTK_REAL z_physical = bd->basis->colloc_point(bd->nb_colloc_points_z, idx_grid_z);
- for (idx_grid_x = 0; idx_grid_x < nb_gridpoints_x; idx_grid_x++) {
- CCTK_REAL x_physical = bd->basis->colloc_point(nb_gridpoints_x, idx_grid_x);
+ for (idx_grid_x = 0; idx_grid_x < bd->nb_colloc_points_x; idx_grid_x++) {
+ CCTK_REAL x_physical = bd->basis->colloc_point(bd->nb_colloc_points_x, idx_grid_x);
CCTK_REAL d2q = bd->laplace_q_func(bd, x_physical, z_physical);
- int idx_grid = idx_grid_z * nb_gridpoints_z + idx_grid_x;
+ int idx_grid = idx_grid_z * bd->nb_colloc_points_z + idx_grid_x;
for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++)
for (idx_coeff_x = 0; idx_coeff_x < bd->nb_coeffs_x; idx_coeff_x++) {
int idx_coeff = idx_coeff_z * bd->nb_coeffs_z + idx_coeff_x;
- mat->data[idx_grid * mat->tda + idx_coeff] = bd->basis->eval_diff2(x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) * x_physical +
- bd->basis->eval_diff1(x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) +
- bd->basis->eval_diff2(z_physical, idx_coeff_z) * bd->basis->eval(x_physical, idx_coeff_x) * x_physical +
- bd->basis->eval (x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) * 0.25 * d2q * x_physical;
+ mat->data[idx_grid + mat->tda * idx_coeff] = basis_d2val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * x_physical +
+ basis_dval [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] +
+ basis_d2val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * basis_val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * x_physical +
+ basis_val [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * 0.25 * d2q * x_physical;
}
rhs->data[idx_grid] = -0.25 * d2q * x_physical;
}
}
+ free(basis_val);
+ free(basis_dval);
+ free(basis_d2val);
+
+ return 0;
+}
+
+static int solve_linear(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs)
+{
+ int *ipiv;
+ gsl_matrix *mat_f;
+ double cond, ferr, berr, rpivot;
+
+ gsl_vector *x = *px;
+ gsl_vector *rhs = *prhs;
+ char equed = 'N';
+ int ret = 0;
+
+ ipiv = malloc(mat->size1 * sizeof(*ipiv));
+ mat_f = gsl_matrix_alloc(mat->size1, mat->size2);
+ if (!ipiv || !mat_f) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', mat->size1, 1,
+ mat->data, mat->tda, mat_f->data, mat_f->tda, ipiv, &equed,
+ NULL, NULL, rhs->data, rhs->size, x->data, x->size,
+ &cond, &ferr, &berr, &rpivot);
+
+ CCTK_VInfo(CCTK_THORNSTRING, "LU factorization solution to a %zdx%zd matrix: "
+ "condition number %16.16g; forward error %16.16g backward error %16.16g",
+ mat->size1, mat->size2, cond, ferr, berr);
+fail:
+ gsl_matrix_free(mat_f);
+ free(ipiv);
+
+ return ret;
+}
+
+static int solve_svd(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs)
+{
+ double *sv;
+ int rank;
+
+ gsl_vector *x = *px;
+ gsl_vector *rhs = *prhs;
+
+ sv = malloc(sizeof(*sv) * x->size);
+ if (!sv)
+ return -ENOMEM;
+
+ LAPACKE_dgelsd(LAPACK_COL_MAJOR, rhs->size, x->size, 1, mat->data, mat->tda,
+ rhs->data, rhs->size, sv, -1, &rank);
+
+ CCTK_VInfo(CCTK_THORNSTRING, "Least squares SVD solution to a %zdx%zd matrix: "
+ "rank %d, condition number %16.16g", mat->size1, mat->size2,
+ rank, sv[x->size - 1] / sv[0]);
+
+ gsl_vector_free(x);
+
+ *px = rhs;
+ (*px)->size = mat->size1;
+ *prhs = NULL;
+
+ free(sv);
+
return 0;
}
@@ -157,40 +235,76 @@ static int brill_construct_matrix(BrillDataContext *bd, gsl_matrix *mat,
*/
static int brill_solve(BrillDataContext *bd)
{
- gsl_matrix *mat;
- gsl_permutation *perm;
- gsl_vector *coeffs, *rhs;
+ gsl_matrix *mat = NULL;
+ gsl_vector *coeffs = NULL, *rhs = NULL;
+
+#if ACC_TEST
+ gsl_vector *rhs_bkp = NULL;
+ gsl_matrix *mat_bkp = NULL;
+#endif
- int signum;
int ret = 0;
/* allocate and fill the pseudospectral matrix */
- mat = gsl_matrix_alloc(bd->nb_coeffs, bd->nb_coeffs);
-
- perm = gsl_permutation_alloc(bd->nb_coeffs);
- coeffs = gsl_vector_alloc (bd->nb_coeffs);
- rhs = gsl_vector_calloc (bd->nb_coeffs);
- if (!mat || !perm || !coeffs || !rhs) {
+ mat = gsl_matrix_alloc (bd->nb_coeffs, bd->nb_colloc_points); // inverted order for lapack
+ coeffs = gsl_vector_alloc (bd->nb_coeffs);
+ rhs = gsl_vector_calloc(bd->nb_colloc_points);
+ if (!mat || !coeffs || !rhs) {
ret = -ENOMEM;
goto fail;
}
/* fill the matrix */
- brill_construct_matrix(bd, mat, rhs);
+ ret = brill_construct_matrix(bd, mat, rhs);
+ if (ret < 0)
+ goto fail;
+
+#if ACC_TEST
+ /* make backups of the matrix and RHS, since they might be destroyed later */
+ mat_bkp = gsl_matrix_alloc(mat->size2, mat->size1);
+ rhs_bkp = gsl_vector_alloc(rhs->size);
+ if (!mat_bkp || !rhs_bkp) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ gsl_vector_memcpy(rhs_bkp, rhs);
+ gsl_matrix_transpose_memcpy(mat_bkp, mat);
+#endif
/* solve for the coeffs */
- gsl_linalg_LU_decomp(mat, perm, &signum);
- gsl_linalg_LU_solve(mat, perm, rhs, coeffs);
+ if (bd->nb_colloc_points > bd->nb_coeffs)
+ ret = solve_svd(mat, &coeffs, &rhs);
+ else
+ ret = solve_linear(mat, &coeffs, &rhs);
bd->psi_coeffs = coeffs;
+#if ACC_TEST
+ {
+ double min, max, rmin, rmax;
+
+ gsl_vector_minmax(rhs_bkp, &rmin, &rmax);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, mat_bkp, coeffs, -1.0, rhs_bkp);
+ gsl_vector_minmax(rhs_bkp, &min, &max);
+
+ CCTK_VInfo(CCTK_THORNSTRING, "max(|A·x - rhs|) / max(|rhs|): %16.16g",
+ MAX(fabs(min), fabs(max)) / MAX(fabs(rmin), fabs(rmax)));
+ }
+#endif
+
fail:
+
+#if ACC_TEST
+ gsl_matrix_free(mat_bkp);
+ gsl_vector_free(rhs_bkp);
+#endif
+
if (ret < 0)
gsl_vector_free(coeffs);
gsl_vector_free(rhs);
gsl_matrix_free(mat);
- gsl_permutation_free(perm);
return ret;
}
@@ -211,11 +325,13 @@ static double laplace_q_gundlach(BrillDataContext *bd, double rho, double z)
return 2 * bd->amplitude * e * (1.0 + 2 * r2 * (r2 + z2 - 3));
}
-static BrillDataContext *init_bd(cGH *cctkGH, float amplitude, int cheb_order)
+static BrillDataContext *init_bd(cGH *cctkGH, CCTK_REAL amplitude,
+ int basis_order_r, int basis_order_z, double sf, int overdet)
{
BrillDataContext *bd;
- int i, j;
+ if (basis_order_r != basis_order_z)
+ CCTK_WARN(0, "Different r and z basis orders are not supported.");
bd = malloc(sizeof(*bd));
@@ -224,13 +340,17 @@ static BrillDataContext *init_bd(cGH *cctkGH, float amplitude, int cheb_order)
bd->amplitude = amplitude;
bd->basis = &sb_even_basis;
- bd->cheb_order = cheb_order;
- bd->nb_coeffs_x = cheb_order;
- bd->nb_coeffs_z = cheb_order;
+ bd->nb_coeffs_x = basis_order_r;
+ bd->nb_coeffs_z = basis_order_z;
bd->nb_coeffs = bd->nb_coeffs_x * bd->nb_coeffs_z;
+ bd->nb_colloc_points_x = basis_order_r + overdet;
+ bd->nb_colloc_points_z = basis_order_z + overdet;
+
+ bd->nb_colloc_points = bd->nb_colloc_points_x * bd->nb_colloc_points_z;
+
scale_factor = sf;
return bd;
@@ -254,7 +374,7 @@ void brill_data(CCTK_ARGUMENTS)
/* on the first run, solve the equation for ψ */
if (!prev_bd) {
- bd = init_bd(cctkGH, amplitude, cheb_order);
+ bd = init_bd(cctkGH, amplitude, basis_order_r, basis_order_z, scale_factor, overdet);
brill_solve(bd);