/* * Copyright 2014-2015 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * @file * the actual pseudo-spectral solver code */ #include #include #include #include #include #include #include #include "brill_data.h" #include "internal.h" #include "qfunc.h" static int brill_construct_matrix_cylindrical(const BDContext *bd, double *mat, double *rhs) { BDPriv *s = bd->priv; double *basis_val, *basis_dval, *basis_d2val; int idx_coeff_rho, idx_coeff_z, idx_grid_rho, idx_grid_z; double *basis[2][3] = { { NULL } }; int ret = 0; /* precompute the basis values we will need */ for (int i = 0; i < ARRAY_ELEMS(basis); i++) { for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++) { basis[i][j] = malloc(sizeof(*basis[i][j]) * s->nb_colloc_points[i] * s->nb_coeffs[i]); if (!basis[i][j]) { ret = -ENOMEM; goto fail; } } for (int j = 0; j < s->nb_colloc_points[i]; j++) { double coord = bdi_basis_colloc_point(s->basis[i], j, s->nb_coeffs[i]); for (int k = 0; k < s->nb_coeffs[i]; k++) { basis[i][0][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_VALUE, coord, k); basis[i][1][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF1, coord, k); basis[i][2][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF2, coord, k); } } } #define BASIS_RHO (basis[0][0][idx_grid_rho * s->nb_coeffs[0] + idx_coeff_rho]) #define DBASIS_RHO (basis[0][1][idx_grid_rho * s->nb_coeffs[0] + idx_coeff_rho]) #define D2BASIS_RHO (basis[0][2][idx_grid_rho * s->nb_coeffs[0] + idx_coeff_rho]) #define BASIS_Z (basis[1][0][idx_grid_z * s->nb_coeffs[1] + idx_coeff_z]) #define DBASIS_Z (basis[1][1][idx_grid_z * s->nb_coeffs[1] + idx_coeff_z]) #define D2BASIS_Z (basis[1][2][idx_grid_z * s->nb_coeffs[1] + idx_coeff_z]) for (idx_grid_z = 0; idx_grid_z < s->nb_colloc_points[1]; idx_grid_z++) { double z_val = bdi_basis_colloc_point(s->basis[1], idx_grid_z, s->nb_coeffs[1]); for (idx_grid_rho = 0; idx_grid_rho < s->nb_colloc_points[0]; idx_grid_rho++) { double x_val = bdi_basis_colloc_point(s->basis[0], idx_grid_rho, s->nb_coeffs[0]); double d2q = bdi_qfunc_eval(s->qfunc, x_val, z_val, 2) + bdi_qfunc_eval(s->qfunc, x_val, z_val, 6); int idx_grid = idx_grid_z * s->nb_colloc_points[0] + idx_grid_rho; for (idx_coeff_z = 0; idx_coeff_z < s->nb_coeffs[1]; idx_coeff_z++) for (idx_coeff_rho = 0; idx_coeff_rho < s->nb_coeffs[0]; idx_coeff_rho++) { int idx_coeff = idx_coeff_z * s->nb_coeffs[0] + idx_coeff_rho; double val = D2BASIS_RHO * BASIS_Z + D2BASIS_Z * BASIS_RHO + BASIS_RHO * BASIS_Z * 0.25 * d2q; if (fabs(x_val) > EPS) val += DBASIS_RHO * BASIS_Z / fabs(x_val); else val += D2BASIS_RHO * BASIS_Z; mat[idx_grid + NB_COLLOC_POINTS(s) * idx_coeff] = val; } rhs[idx_grid] = -0.25 * d2q; } } fail: for (int i = 0; i < ARRAY_ELEMS(basis); i++) for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++) free(basis[i][j]); return ret; } static int brill_construct_matrix_polar(const BDContext *bd, double *mat, double *rhs) { BDPriv *s = bd->priv; double *basis_val, *basis_dval, *basis_d2val; int idx_coeff_0, idx_coeff_1, idx_grid_0, idx_grid_1; double *basis[2][3] = { { NULL } }; int ret = 0; /* precompute the basis values we will need */ for (int i = 0; i < ARRAY_ELEMS(basis); i++) { for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++) { basis[i][j] = malloc(sizeof(*basis[i][j]) * s->nb_colloc_points[i] * s->nb_coeffs[i]); if (!basis[i][j]) { ret = -ENOMEM; goto fail; } } for (int j = 0; j < s->nb_colloc_points[i]; j++) { double coord = bdi_basis_colloc_point(s->basis[i], j, s->nb_coeffs[i]); for (int k = 0; k < s->nb_coeffs[i]; k++) { basis[i][0][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_VALUE, coord, k); basis[i][1][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF1, coord, k); basis[i][2][j * s->nb_coeffs[i] + k] = bdi_basis_eval(s->basis[i], BS_EVAL_TYPE_DIFF2, coord, k); } } } #define BASIS0 (basis[0][0][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0]) #define DBASIS0 (basis[0][1][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0]) #define D2BASIS0 (basis[0][2][idx_grid_0 * s->nb_coeffs[0] + idx_coeff_0]) #define BASIS1 (basis[1][0][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1]) #define DBASIS1 (basis[1][1][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1]) #define D2BASIS1 (basis[1][2][idx_grid_1 * s->nb_coeffs[1] + idx_coeff_1]) for (idx_grid_1 = 0; idx_grid_1 < s->nb_colloc_points[1]; idx_grid_1++) { double phi = bdi_basis_colloc_point(s->basis[1], idx_grid_1, s->nb_coeffs[1]); for (idx_grid_0 = 0; idx_grid_0 < s->nb_colloc_points[0]; idx_grid_0++) { double r = bdi_basis_colloc_point(s->basis[0], idx_grid_0, s->nb_coeffs[0]); double x = r * cos(phi); double z = r * sin(phi); double d2q = bdi_qfunc_eval(s->qfunc, x, z, 2) + bdi_qfunc_eval(s->qfunc, x, z, 6); int idx_grid = idx_grid_1 * s->nb_colloc_points[0] + idx_grid_0; for (idx_coeff_1 = 0; idx_coeff_1 < s->nb_coeffs[1]; idx_coeff_1++) for (idx_coeff_0 = 0; idx_coeff_0 < s->nb_coeffs[0]; idx_coeff_0++) { int idx_coeff = idx_coeff_1 * s->nb_coeffs[0] + idx_coeff_0; double basis_val_20 = ((r > EPS) ? (SQR(x / r) * D2BASIS0 * BASIS1 + SQR(z / SQR(r)) * BASIS0 * D2BASIS1 + (1.0 - SQR(x / r)) / r * DBASIS0 * BASIS1 + 2 * x * z / SQR(SQR(r)) * BASIS0 * DBASIS1 - 2 * z * x / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); double basis_val_02 = ((r > EPS) ? (SQR(z / r) * D2BASIS0 * BASIS1 + SQR(x / SQR(r)) * BASIS0 * D2BASIS1 + (1.0 - SQR(z / r)) / r * DBASIS0 * BASIS1 - 2 * x * z / SQR(SQR(r)) * BASIS0 * DBASIS1 + 2 * z * x / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * x / r - BASIS0 * DBASIS1 * z / SQR(r)) : 0.0); double val = basis_val_20 + basis_val_02 + BASIS0 * BASIS1 * 0.25 * d2q; if (fabs(x) > EPS) val += basis_val_10 / fabs(x); else val += basis_val_20; mat[idx_grid + NB_COLLOC_POINTS(s) * idx_coeff] = val; } rhs[idx_grid] = -0.25 * d2q; } } fail: for (int i = 0; i < ARRAY_ELEMS(basis); i++) for (int j = 0; j < ARRAY_ELEMS(basis[i]); j++) free(basis[i][j]); return ret; } static int brill_solve_linear(const BDContext *bd, double *mat, double **px, double **prhs) { const BDPriv *s = bd->priv; const int stride = NB_COEFFS(s); int *ipiv; double *mat_f; double cond, ferr, berr, rpivot; double *x = *px; double *rhs = *prhs; char equed = 'N'; int ret = 0; ipiv = malloc(stride * sizeof(*ipiv)); mat_f = malloc(SQR(stride) * sizeof(*mat_f)); if (!ipiv || !mat_f) { ret = -ENOMEM; goto fail; } LAPACKE_dgesvx(LAPACK_COL_MAJOR, 'N', 'N', stride, 1, mat, stride, mat_f, stride, ipiv, &equed, NULL, NULL, rhs, stride, x, stride, &cond, &ferr, &berr, &rpivot); bdi_log(bd, 0, "LU factorization solution to a %zdx%zd matrix: " "condition number %16.16g; forward error %16.16g backward error %16.16g\n", stride, stride, cond, ferr, berr); fail: free(mat_f); free(ipiv); return ret; } static int brill_solve_svd(const BDContext *bd, double *mat, double **px, double **prhs) { const BDPriv *s = bd->priv; double *sv; int rank; double *x = *px; double *rhs = *prhs; sv = malloc(sizeof(*sv) * NB_COEFFS(s)); if (!sv) return -ENOMEM; LAPACKE_dgelsd(LAPACK_COL_MAJOR, NB_COLLOC_POINTS(s), NB_COEFFS(s), 1, mat, NB_COLLOC_POINTS(s), rhs, MAX(NB_COEFFS(s), NB_COLLOC_POINTS(s)), sv, -1, &rank); bdi_log(bd, 0, "Least squares SVD solution to a %zdx%zd matrix: " "rank %d, condition number %16.16g\n", NB_COLLOC_POINTS(s), NB_COEFFS(s), rank, sv[NB_COEFFS(s) - 1] / sv[0]); free(sv); *px = rhs; *prhs = x; return 0; } static int brill_export_coeffs(BDPriv *s, const double *coeffs) { int alignment = REQ_ALIGNMENT(*coeffs); int nb_coeffs_aligned[2] = { ALIGN(s->nb_coeffs[0], alignment), ALIGN(s->nb_coeffs[1], alignment) }; int ret, i; ret = posix_memalign((void**)&s->coeffs, REQ_ALIGNMENT(char), nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); if (ret) return -ret; memset(s->coeffs, 0, nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); for (i = 0; i < s->nb_coeffs[1]; i++) memcpy(s->coeffs + i * nb_coeffs_aligned[0], coeffs + i * s->nb_coeffs[0], sizeof(*coeffs) * s->nb_coeffs[0]); s->coeffs_stride = nb_coeffs_aligned[0]; return 0; } /* * Solve the equation * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0 * for the coefficients of spectral approximation of ψ: * ψ(r, z) = 1 + ΣaᵢⱼTᵢ(r)Tⱼ(z) * where i = { 0, ... , bd->nb_coeffs_rho }; * j = { 0, ... , bd->nb_coeffs_z }; * q(r, z) and Tᵢ(x) are defined by bd->q_func, bd->laplace_q_func and * bd->basis. * * The cofficients are exported in bd->priv->coeffs */ int bdi_solve(BDContext *bd) { BDPriv *s = bd->priv; const int vecsize = MAX(NB_COEFFS(s), NB_COLLOC_POINTS(s)); double *mat = NULL; double *rhs = NULL, *coeffs = NULL; int ret = 0; /* allocate and fill the pseudospectral matrix */ mat = malloc(sizeof(*mat) * NB_COEFFS(s) * NB_COLLOC_POINTS(s)); coeffs = malloc(sizeof(*coeffs) * vecsize); rhs = malloc(sizeof(*rhs) * vecsize); if (!mat || !coeffs || !rhs) { ret = -ENOMEM; goto fail; } /* fill the matrix */ switch (s->coord_system) { case COORD_SYSTEM_CYLINDRICAL: ret = brill_construct_matrix_cylindrical(bd, mat, rhs); break; case COORD_SYSTEM_RADIAL: ret = brill_construct_matrix_polar(bd, mat, rhs); break; } if (ret < 0) goto fail; /* solve for the coeffs */ if (s->nb_colloc_points > s->nb_coeffs) ret = brill_solve_svd(bd, mat, &coeffs, &rhs); else ret = brill_solve_linear(bd, mat, &coeffs, &rhs); /* export the result */ ret = brill_export_coeffs(s, coeffs); if (ret < 0) goto fail; fail: free(coeffs); free(rhs); free(mat); return ret; }