/* * Copyright 2017-2022 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "basis.h" #include "common.h" #include "internal.h" #include "log.h" #include "pssolve.h" #include "nlsolve.h" #include "td_constraints.h" #include "teukolsky_data.h" typedef struct EvalVarData { const TDContext *td; const BasisSetContext *basis[2]; const double *coeffs; const double *theta; const double *r; double *basis_val[2]; double *out; double diff_order[2]; double init_val; } EvalVarData; typedef struct MaximalSlicingEval { double *psi; double *dpsi_r; double *dpsi_t; double *krr; double *kpp; double *krt; } MaximalSlicingEval; static double scalarproduct_metric_c(size_t len1, size_t len2, const double *mat, const double *vec1, const double *vec2) { double val = 0.0; for (int m = 0; m < len2; m++) { double tmp = 0.0; for (int n = 0; n < len1; n++) tmp += mat[m * len1 + n] * vec1[n]; val += tmp * vec2[m]; } return val; } static int eval_var_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx) { EvalVarData *d = arg; const TDContext *td = d->td; const TDPriv *s = td->priv; double *basis_val[2] = { d->basis_val[0] + thread_idx * td->nb_coeffs[0], d->basis_val[1] + thread_idx * td->nb_coeffs[1], }; double theta_val = d->theta[job_idx]; double r_val = d->r[job_idx]; double val = d->init_val; for (int k = 0; k < td->nb_coeffs[0]; k++) basis_val[0][k] = tdi_basis_eval(d->basis[0], d->diff_order[0], r_val, k); for (int k = 0; k < td->nb_coeffs[1]; k++) basis_val[1][k] = tdi_basis_eval(d->basis[1], d->diff_order[1], theta_val, k); val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], d->coeffs, basis_val[0], basis_val[1]); d->out[job_idx] = val; } static int maximal_slicing_eval(void *opaque, unsigned int eq_idx, const unsigned int *colloc_grid_order, const double * const *colloc_grid, const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB], double *dst) { MaximalSlicingEval *max = opaque; for (int idx1 = 0; idx1 < colloc_grid_order[1]; idx1++) { const double theta = colloc_grid[1][idx1]; const double st = sin(theta); const double st2 = SQR(st); const double ct = cos(theta); for (int idx0 = 0; idx0 < colloc_grid_order[0]; idx0++) { const double r = colloc_grid[0][idx0]; const double r2 = SQR(r); const int idx = idx1 * colloc_grid_order[0] + idx0; const double alpha = vars[0][PSSOLVE_DIFF_ORDER_00][idx] + 1.0; const double dalpha_r = vars[0][PSSOLVE_DIFF_ORDER_10][idx]; const double dalpha_t = vars[0][PSSOLVE_DIFF_ORDER_01][idx]; const double d2alpha_rr = vars[0][PSSOLVE_DIFF_ORDER_20][idx]; const double d2alpha_tt = vars[0][PSSOLVE_DIFF_ORDER_02][idx]; const double d2alpha_rt = vars[0][PSSOLVE_DIFF_ORDER_11][idx]; const double d2alpha[3][3] = { { d2alpha_rr, d2alpha_rt, 0.0 }, { d2alpha_rt, d2alpha_tt, 0.0 }, { 0.0, 0.0, 0.0 }, }; const double psi = max->psi[idx]; const double psi2 = SQR(psi); const double psi3 = psi * psi2; const double psi4 = SQR(psi2); const double dpsi_r = max->dpsi_r[idx]; const double dpsi_t = max->dpsi_t[idx]; const double g[3][3] = { { psi4, 0.0, 0.0 }, { 0.0, psi4 * r2, 0.0 }, { 0.0, 0.0, psi4 * r2 * st2}, }; const double gu[3][3] = { { 1.0 / psi4, 0.0, 0.0 }, { 0.0, 1.0 / (psi4 * r2), 0.0 }, { 0.0, 0.0, 1.0 / (psi4 * r2 * st2) }, }; const double dg[3][3][3] = { { { 4.0 * dpsi_r * psi3, 0.0, 0.0 }, { 0.0, 4.0 * dpsi_r * psi3 * r2 + 2.0 * r * psi4, 0.0 }, { 0.0, 0.0, 4.0 * dpsi_r * psi3 * r2 * st2 + 2.0 * r * psi4 * st2 }, }, { { 4.0 * dpsi_t * psi3, 0.0, 0.0 }, { 0.0, 4.0 * dpsi_t * psi3 * r2, 0.0 }, { 0.0, 0.0, 4.0 * dpsi_t * psi3 * r2 * st2 + 2.0 * psi4 * r2 * st * ct }, }, { { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0 }, }, }; const double krr = max->krr[idx]; const double kpp = max->kpp[idx]; const double krt = max->krt[idx]; const double Km[3][3] = { { krr, krt, 0.0 }, { krt / r2, -(krr + kpp), 0.0 }, { 0.0, 0.0, kpp }, }; double G[3][3][3], X[3]; double laplace_alpha, k2; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gu[i][l] * (dg[k][j][l] + dg[j][k][l] - dg[l][j][k]); G[i][j][k] = 0.5 * val; } for (int i = 0; i < 3; i++) { double val = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) val += gu[j][k] * G[i][j][k]; X[i] = val; } laplace_alpha = 0.0; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) laplace_alpha += gu[i][j] * d2alpha[i][j]; laplace_alpha -= X[0] * dalpha_r + X[1] * dalpha_t; k2 = 0.0; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) k2 += Km[i][j] * Km[j][i]; dst[idx] = laplace_alpha - alpha * k2; } } return 0; } static int lapse_solve_max(const TDContext *td) { MaximalSlicingEval max = { NULL }; TDPriv *priv = td->priv; NLSolveContext *nl; double *coords_r = NULL; double *coords_t = NULL; double *coeffs; int ret = 0; ret = tdi_nlsolve_context_alloc(&nl, 1); if (ret < 0) { tdi_log(&priv->logger, 0, "Error allocating the non-linear solver\n"); return ret; } nl->logger = priv->logger; nl->tp = priv->tp; nl->maxiter = td->max_iter; nl->atol = td->atol; nl->basis[0][0] = priv->basis[0][0]; nl->basis[0][1] = priv->basis[0][1]; nl->solve_order[0][0] = priv->basis_order[0][0]; nl->solve_order[0][1] = priv->basis_order[0][1]; ret = tdi_nlsolve_context_init(nl); if (ret < 0) { tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n"); goto fail; } ret = posix_memalign((void**)&max.psi, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.dpsi_r, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.dpsi_t, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.krr, 32, sizeof(*max.krr) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.kpp, 32, sizeof(*max.kpp) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.krt, 32, sizeof(*max.krt) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coords_r, 32, sizeof(*coords_r) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coords_t, 32, sizeof(*coords_t) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coeffs, 32, sizeof(*coeffs) * td->nb_coeffs[0] * td->nb_coeffs[1]); if (ret) { ret = -ENOMEM; goto fail; } for (int j = 0; j < td->nb_coeffs[1]; j++) { for (int i = 0; i < td->nb_coeffs[0]; i++) { coords_r[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][0][i]; coords_t[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][1][j]; } } td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.psi); td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 1, 0 }, max.dpsi_r); td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 1 }, max.dpsi_t); td_eval_krr(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.krr); td_eval_kpp(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.kpp); td_eval_krt(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.krt); ret = tdi_nlsolve_solve(nl, maximal_slicing_eval, NULL, &max, coeffs, 0); if (ret < 0) { tdi_log(&priv->logger, 0, "Error solving the maximal slicing equation\n"); goto fail; } priv->coeffs_lapse = coeffs; coeffs = NULL; fail: free(coords_r); free(coords_t); free(coeffs); free(max.psi); free(max.dpsi_r); free(max.dpsi_t); free(max.krr); free(max.kpp); free(max.krt); tdi_nlsolve_context_free(&nl); return ret; } int td_eval_lapse(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { TDPriv *priv = td->priv; const int nb_threads = tp_get_nb_threads(priv->tp); EvalVarData thread_data = { NULL }; int ret; if (!priv->coeffs_lapse) { ret = lapse_solve_max(td); if (ret < 0) return ret; } if (diff_order[0] > 2 || diff_order[1] > 2) { tdi_log(&priv->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); return -ENOSYS; } for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; ret = posix_memalign((void**)&thread_data.basis_val[i], 32, sizeof(*thread_data.basis_val[i]) * alloc_elems); if (ret != 0) { ret = -ENOMEM; goto fail; } memset(thread_data.basis_val[i], 0, sizeof(*thread_data.basis_val[i]) * alloc_elems); } thread_data.td = td; thread_data.r = r; thread_data.theta = theta; thread_data.out = out; thread_data.diff_order[0] = diff_order[0]; thread_data.diff_order[1] = diff_order[1]; thread_data.basis[0] = priv->basis[0][0]; thread_data.basis[1] = priv->basis[0][1]; thread_data.coeffs = priv->coeffs_lapse; thread_data.init_val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; tp_execute(priv->tp, nb_coords, eval_var_kernel, &thread_data); fail: free(thread_data.basis_val[0]); free(thread_data.basis_val[1]); return 0; } static int eval_var(const TDContext *td, unsigned int var_idx, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double add, double *out) { TDPriv *s = td->priv; const int nb_threads = tp_get_nb_threads(s->tp); EvalVarData thread_data = { NULL }; int ret = 0; if (diff_order[0] > 2 || diff_order[1] > 2) { tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); return -ENOSYS; } for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; ret = posix_memalign((void**)&thread_data.basis_val[i], 32, sizeof(*thread_data.basis_val[i]) * alloc_elems); if (ret != 0) { ret = -ENOMEM; goto fail; } memset(thread_data.basis_val[i], 0, sizeof(*thread_data.basis_val[i]) * alloc_elems); } thread_data.td = td; thread_data.r = r; thread_data.theta = theta; thread_data.out = out; thread_data.diff_order[0] = diff_order[0]; thread_data.diff_order[1] = diff_order[1]; thread_data.basis[0] = s->basis[var_idx][0]; thread_data.basis[1] = s->basis[var_idx][1]; thread_data.coeffs = td->coeffs[var_idx]; thread_data.init_val = add; tp_execute(s->tp, nb_coords, eval_var_kernel, &thread_data); fail: free(thread_data.basis_val[0]); free(thread_data.basis_val[1]); return ret; } int td_eval_psi(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { const double add = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; return eval_var(td, 0, nb_coords, r, theta, diff_order, add, out); } int td_eval_krr(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { return eval_var(td, 1, nb_coords, r, theta, diff_order, 0.0, out); } int td_eval_kpp(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { return eval_var(td, 2, nb_coords, r, theta, diff_order, 0.0, out); } int td_eval_krt(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { static const double dummy_coord = 0.0; static const double *dummy_coords[2] = { &dummy_coord, &dummy_coord }; static const unsigned int nb_dummy_coords[2] = { 1, 1 }; TDConstraintEvalContext *ce; double (*eval)(TDConstraintEvalContext*, double, double); int ret; if (diff_order[0] == 0 && diff_order[1] == 0) eval = tdi_constraint_eval_k_rtheta; else if (diff_order[0] == 1 && diff_order[1] == 0) eval = tdi_constraint_eval_dk_rtheta_r; else if (diff_order[0] == 0 && diff_order[1] == 1) eval = tdi_constraint_eval_dk_rtheta_t; else return -ENOSYS; ret = tdi_ce_alloc(td, nb_dummy_coords, dummy_coords, td->amplitude, &ce); if (ret < 0) return ret; for (int i = 0; i < nb_coords; i++) { double theta_val = theta[i]; double r_val = r[i]; out[i] = eval(ce, r_val, theta_val); } tdi_constraint_eval_free(&ce); return 0; } int td_eval_residual(const TDContext *td, size_t nb_coords_r, const double *r, size_t nb_coords_theta, const double *theta, double *out[3]) { const unsigned int nb_coords[2] = { nb_coords_r, nb_coords_theta }; const double *coords[2] = { r, theta}; const size_t grid_size = nb_coords_r * nb_coords_theta; TDConstraintEvalContext *ce = NULL; double *r_2d = NULL, *theta_2d = NULL; double * vars [TD_CONSTRAINT_VAR_NB][PSSOLVE_DIFF_ORDER_NB] = {{ NULL }}; const double * vars_c[TD_CONSTRAINT_VAR_NB][PSSOLVE_DIFF_ORDER_NB] = {{ NULL }}; int ret; ret = posix_memalign((void**)&r_2d, 32, grid_size * sizeof(*r_2d)); ret |= posix_memalign((void**)&theta_2d, 32, grid_size * sizeof(*theta_2d)); if (ret) { ret = -ENOMEM; goto fail; } for (size_t j = 0; j < nb_coords_theta; j++) { for (size_t i = 0; i < nb_coords_r; i++) { const size_t idx = j * nb_coords_r + i; r_2d[idx] = r[i]; theta_2d[idx] = theta[j]; } } for (int var = 0; var < TD_CONSTRAINT_VAR_NB; var++) { for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { const unsigned int diff_orders[2] = { tdi_pssolve_diff_order(diff_order, 0), tdi_pssolve_diff_order(diff_order, 1), }; ret = posix_memalign((void**)&vars[var][diff_order], 32, grid_size * sizeof(*vars[var][diff_order])); if (ret) { ret = -ENOMEM; goto fail; } ret = eval_var(td, var, grid_size, r_2d, theta_2d, diff_orders, 0.0, vars[var][diff_order]); if (ret) goto fail; vars_c[var][diff_order] = vars[var][diff_order]; } } ret = tdi_ce_alloc(td, nb_coords, coords, td->amplitude, &ce); if (ret < 0) goto fail; ret = tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_HAM, vars_c, out[0]); ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_0, vars_c, out[1]); ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_1, vars_c, out[2]); if (ret) goto fail; ret = 0; fail: free(r_2d); free(theta_2d); tdi_constraint_eval_free(&ce); for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { free(vars[0][diff_order]); free(vars[1][diff_order]); free(vars[2][diff_order]); } return ret; }