From 5817b21222c84a9aaaa486216933aa068ab0f30b Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 11 Sep 2014 12:03:45 +0200 Subject: Rewrite the matrix inversion code. --- param.ccl | 15 ++++- src/brill.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 173 insertions(+), 44 deletions(-) diff --git a/param.ccl b/param.ccl index 64f2063..068f4a6 100644 --- a/param.ccl +++ b/param.ccl @@ -13,13 +13,22 @@ CCTK_REAL amplitude "amplitude" : :: "" } 1 -RESTRICTED: -CCTK_INT cheb_order "order" +CCTK_INT basis_order_r "Number of the basis functions in the radial direction" +{ + 1: :: "" +} 40 + +CCTK_INT basis_order_z "Number of the basis functions in the z direction" { 1: :: "" -} 20 +} 40 CCTK_REAL scale_factor "Scaling factor L for the SB basis" { 0: :: "" } 3.0 + +CCTK_INT overdet "Solve the system overdetermined by this many equations" +{ + 0: :: "" +} 0 diff --git a/src/brill.c b/src/brill.c index 48912c4..4592081 100644 --- a/src/brill.c +++ b/src/brill.c @@ -8,12 +8,15 @@ #include #include #include -#include + +#include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" +#define ACC_TEST 1 + #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define MIN(x, y) ((x) > (y) ? (y) : (x)) #define SQR(x) ((x) * (x)) @@ -94,8 +97,7 @@ static const BasisSet sb_even_basis = { typedef struct BrillDataContext { /* options */ - int cheb_order; - float amplitude; + CCTK_REAL amplitude; double (*q_func)(struct BrillDataContext *bd, double rho, double z); double (*laplace_q_func)(struct BrillDataContext *bd, double rho, double z); @@ -107,45 +109,121 @@ typedef struct BrillDataContext { int nb_coeffs_z; int nb_coeffs; + int nb_colloc_points_x; + int nb_colloc_points_z; + int nb_colloc_points; + gsl_vector *psi_coeffs; } BrillDataContext; static int brill_construct_matrix(BrillDataContext *bd, gsl_matrix *mat, gsl_vector *rhs) { - const int nb_gridpoints_z = bd->cheb_order; - const int nb_gridpoints_x = bd->cheb_order; - const int nb_gridpoints = nb_gridpoints_x * nb_gridpoints_z; - - CCTK_REAL z_physical, x_physical; - + CCTK_REAL *basis_val, *basis_dval, *basis_d2val; int idx_coeff_x, idx_coeff_z, idx_grid_x, idx_grid_z; - int idx_coeff, idx_grid; - /* interior */ - for (idx_grid_z = 0; idx_grid_z < nb_gridpoints_z; idx_grid_z++) { - CCTK_REAL z_physical = bd->basis->colloc_point(nb_gridpoints_z, idx_grid_z); + /* precompute the basis values we will need */ + // FIXME: assumes same number of points in x and z directions + basis_val = malloc(sizeof(*basis_val) * bd->nb_colloc_points_x * bd->nb_coeffs_x); + basis_dval = malloc(sizeof(*basis_dval) * bd->nb_colloc_points_x * bd->nb_coeffs_x); + basis_d2val = malloc(sizeof(*basis_d2val) * bd->nb_colloc_points_x * bd->nb_coeffs_x); + for (int i = 0; i < bd->nb_colloc_points_x; i++) { + CCTK_REAL coord = bd->basis->colloc_point(bd->nb_colloc_points_x, i); + for (int j = 0; j < bd->nb_coeffs_x; j++) { + basis_val [i * bd->nb_coeffs_x + j] = bd->basis->eval(coord, j); + basis_dval [i * bd->nb_coeffs_x + j] = bd->basis->eval_diff1(coord, j); + basis_d2val[i * bd->nb_coeffs_x + j] = bd->basis->eval_diff2(coord, j); + } + } - fprintf(stdout, "coord %d %g\n", idx_grid_z, z_physical); + for (idx_grid_z = 0; idx_grid_z < bd->nb_colloc_points_z; idx_grid_z++) { + CCTK_REAL z_physical = bd->basis->colloc_point(bd->nb_colloc_points_z, idx_grid_z); - for (idx_grid_x = 0; idx_grid_x < nb_gridpoints_x; idx_grid_x++) { - CCTK_REAL x_physical = bd->basis->colloc_point(nb_gridpoints_x, idx_grid_x); + for (idx_grid_x = 0; idx_grid_x < bd->nb_colloc_points_x; idx_grid_x++) { + CCTK_REAL x_physical = bd->basis->colloc_point(bd->nb_colloc_points_x, idx_grid_x); CCTK_REAL d2q = bd->laplace_q_func(bd, x_physical, z_physical); - int idx_grid = idx_grid_z * nb_gridpoints_z + idx_grid_x; + int idx_grid = idx_grid_z * bd->nb_colloc_points_z + idx_grid_x; for (idx_coeff_z = 0; idx_coeff_z < bd->nb_coeffs_z; idx_coeff_z++) for (idx_coeff_x = 0; idx_coeff_x < bd->nb_coeffs_x; idx_coeff_x++) { int idx_coeff = idx_coeff_z * bd->nb_coeffs_z + idx_coeff_x; - mat->data[idx_grid * mat->tda + idx_coeff] = bd->basis->eval_diff2(x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) * x_physical + - bd->basis->eval_diff1(x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) + - bd->basis->eval_diff2(z_physical, idx_coeff_z) * bd->basis->eval(x_physical, idx_coeff_x) * x_physical + - bd->basis->eval (x_physical, idx_coeff_x) * bd->basis->eval(z_physical, idx_coeff_z) * 0.25 * d2q * x_physical; + mat->data[idx_grid + mat->tda * idx_coeff] = basis_d2val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * x_physical + + basis_dval [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] + + basis_d2val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * basis_val[idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * x_physical + + basis_val [idx_grid_x * bd->nb_coeffs_x + idx_coeff_x] * basis_val[idx_grid_z * bd->nb_coeffs_z + idx_coeff_z] * 0.25 * d2q * x_physical; } rhs->data[idx_grid] = -0.25 * d2q * x_physical; } } + free(basis_val); + free(basis_dval); + free(basis_d2val); + + return 0; +} + +static int solve_linear(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) +{ + int *ipiv; + gsl_matrix *mat_f; + double cond, ferr, berr, rpivot; + + gsl_vector *x = *px; + gsl_vector *rhs = *prhs; + char equed = 'N'; + int ret = 0; + + ipiv = malloc(mat->size1 * sizeof(*ipiv)); + mat_f = gsl_matrix_alloc(mat->size1, mat->size2); + if (!ipiv || !mat_f) { + ret = -ENOMEM; + goto fail; + } + + LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', mat->size1, 1, + mat->data, mat->tda, mat_f->data, mat_f->tda, ipiv, &equed, + NULL, NULL, rhs->data, rhs->size, x->data, x->size, + &cond, &ferr, &berr, &rpivot); + + CCTK_VInfo(CCTK_THORNSTRING, "LU factorization solution to a %zdx%zd matrix: " + "condition number %16.16g; forward error %16.16g backward error %16.16g", + mat->size1, mat->size2, cond, ferr, berr); +fail: + gsl_matrix_free(mat_f); + free(ipiv); + + return ret; +} + +static int solve_svd(gsl_matrix *mat, gsl_vector **px, gsl_vector **prhs) +{ + double *sv; + int rank; + + gsl_vector *x = *px; + gsl_vector *rhs = *prhs; + + sv = malloc(sizeof(*sv) * x->size); + if (!sv) + return -ENOMEM; + + LAPACKE_dgelsd(LAPACK_COL_MAJOR, rhs->size, x->size, 1, mat->data, mat->tda, + rhs->data, rhs->size, sv, -1, &rank); + + CCTK_VInfo(CCTK_THORNSTRING, "Least squares SVD solution to a %zdx%zd matrix: " + "rank %d, condition number %16.16g", mat->size1, mat->size2, + rank, sv[x->size - 1] / sv[0]); + + gsl_vector_free(x); + + *px = rhs; + (*px)->size = mat->size1; + *prhs = NULL; + + free(sv); + return 0; } @@ -157,40 +235,76 @@ static int brill_construct_matrix(BrillDataContext *bd, gsl_matrix *mat, */ static int brill_solve(BrillDataContext *bd) { - gsl_matrix *mat; - gsl_permutation *perm; - gsl_vector *coeffs, *rhs; + gsl_matrix *mat = NULL; + gsl_vector *coeffs = NULL, *rhs = NULL; + +#if ACC_TEST + gsl_vector *rhs_bkp = NULL; + gsl_matrix *mat_bkp = NULL; +#endif - int signum; int ret = 0; /* allocate and fill the pseudospectral matrix */ - mat = gsl_matrix_alloc(bd->nb_coeffs, bd->nb_coeffs); - - perm = gsl_permutation_alloc(bd->nb_coeffs); - coeffs = gsl_vector_alloc (bd->nb_coeffs); - rhs = gsl_vector_calloc (bd->nb_coeffs); - if (!mat || !perm || !coeffs || !rhs) { + mat = gsl_matrix_alloc (bd->nb_coeffs, bd->nb_colloc_points); // inverted order for lapack + coeffs = gsl_vector_alloc (bd->nb_coeffs); + rhs = gsl_vector_calloc(bd->nb_colloc_points); + if (!mat || !coeffs || !rhs) { ret = -ENOMEM; goto fail; } /* fill the matrix */ - brill_construct_matrix(bd, mat, rhs); + ret = brill_construct_matrix(bd, mat, rhs); + if (ret < 0) + goto fail; + +#if ACC_TEST + /* make backups of the matrix and RHS, since they might be destroyed later */ + mat_bkp = gsl_matrix_alloc(mat->size2, mat->size1); + rhs_bkp = gsl_vector_alloc(rhs->size); + if (!mat_bkp || !rhs_bkp) { + ret = -ENOMEM; + goto fail; + } + + gsl_vector_memcpy(rhs_bkp, rhs); + gsl_matrix_transpose_memcpy(mat_bkp, mat); +#endif /* solve for the coeffs */ - gsl_linalg_LU_decomp(mat, perm, &signum); - gsl_linalg_LU_solve(mat, perm, rhs, coeffs); + if (bd->nb_colloc_points > bd->nb_coeffs) + ret = solve_svd(mat, &coeffs, &rhs); + else + ret = solve_linear(mat, &coeffs, &rhs); bd->psi_coeffs = coeffs; +#if ACC_TEST + { + double min, max, rmin, rmax; + + gsl_vector_minmax(rhs_bkp, &rmin, &rmax); + gsl_blas_dgemv(CblasNoTrans, 1.0, mat_bkp, coeffs, -1.0, rhs_bkp); + gsl_vector_minmax(rhs_bkp, &min, &max); + + CCTK_VInfo(CCTK_THORNSTRING, "max(|A·x - rhs|) / max(|rhs|): %16.16g", + MAX(fabs(min), fabs(max)) / MAX(fabs(rmin), fabs(rmax))); + } +#endif + fail: + +#if ACC_TEST + gsl_matrix_free(mat_bkp); + gsl_vector_free(rhs_bkp); +#endif + if (ret < 0) gsl_vector_free(coeffs); gsl_vector_free(rhs); gsl_matrix_free(mat); - gsl_permutation_free(perm); return ret; } @@ -211,11 +325,13 @@ static double laplace_q_gundlach(BrillDataContext *bd, double rho, double z) return 2 * bd->amplitude * e * (1.0 + 2 * r2 * (r2 + z2 - 3)); } -static BrillDataContext *init_bd(cGH *cctkGH, float amplitude, int cheb_order) +static BrillDataContext *init_bd(cGH *cctkGH, CCTK_REAL amplitude, + int basis_order_r, int basis_order_z, double sf, int overdet) { BrillDataContext *bd; - int i, j; + if (basis_order_r != basis_order_z) + CCTK_WARN(0, "Different r and z basis orders are not supported."); bd = malloc(sizeof(*bd)); @@ -224,13 +340,17 @@ static BrillDataContext *init_bd(cGH *cctkGH, float amplitude, int cheb_order) bd->amplitude = amplitude; bd->basis = &sb_even_basis; - bd->cheb_order = cheb_order; - bd->nb_coeffs_x = cheb_order; - bd->nb_coeffs_z = cheb_order; + bd->nb_coeffs_x = basis_order_r; + bd->nb_coeffs_z = basis_order_z; bd->nb_coeffs = bd->nb_coeffs_x * bd->nb_coeffs_z; + bd->nb_colloc_points_x = basis_order_r + overdet; + bd->nb_colloc_points_z = basis_order_z + overdet; + + bd->nb_colloc_points = bd->nb_colloc_points_x * bd->nb_colloc_points_z; + scale_factor = sf; return bd; @@ -254,7 +374,7 @@ void brill_data(CCTK_ARGUMENTS) /* on the first run, solve the equation for ψ */ if (!prev_bd) { - bd = init_bd(cctkGH, amplitude, cheb_order); + bd = init_bd(cctkGH, amplitude, basis_order_r, basis_order_z, scale_factor, overdet); brill_solve(bd); -- cgit v1.2.3