aboutsummaryrefslogtreecommitdiff
path: root/eval.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-09-16 11:49:55 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-16 11:49:55 +0200
commitbc9a2906fc30e2d73d6b4c23783df5e15c90278c (patch)
tree0abbf54b83b618fa8606ac95c8180d14ace392b5 /eval.c
parent24f21b65e0ce76b824a57cdd3e84388d20115541 (diff)
Move eval code to its own file.
Diffstat (limited to 'eval.c')
-rw-r--r--eval.c547
1 files changed, 547 insertions, 0 deletions
diff --git a/eval.c b/eval.c
new file mode 100644
index 0000000..948468b
--- /dev/null
+++ b/eval.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright 2017-2022 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <threadpool.h>
+
+#include "basis.h"
+#include "common.h"
+#include "internal.h"
+#include "log.h"
+#include "pssolve.h"
+#include "nlsolve.h"
+#include "td_constraints.h"
+#include "teukolsky_data.h"
+
+typedef struct EvalVarData {
+ const TDContext *td;
+
+ const BasisSetContext *basis[2];
+
+ const double *coeffs;
+
+ const double *theta;
+ const double *r;
+
+ double *basis_val[2];
+ double *out;
+
+ double diff_order[2];
+ double init_val;
+} EvalVarData;
+
+typedef struct MaximalSlicingEval {
+ double *psi;
+ double *dpsi_r;
+ double *dpsi_t;
+ double *krr;
+ double *kpp;
+ double *krt;
+} MaximalSlicingEval;
+
+static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2)
+{
+ double val = 0.0;
+ for (int m = 0; m < len2; m++) {
+ double tmp = 0.0;
+ for (int n = 0; n < len1; n++)
+ tmp += mat[m * len1 + n] * vec1[n];
+ val += tmp * vec2[m];
+ }
+ return val;
+}
+
+static int eval_var_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ EvalVarData *d = arg;
+ const TDContext *td = d->td;
+ const TDPriv *s = td->priv;
+
+ double *basis_val[2] = {
+ d->basis_val[0] + thread_idx * td->nb_coeffs[0],
+ d->basis_val[1] + thread_idx * td->nb_coeffs[1],
+ };
+
+ double theta_val = d->theta[job_idx];
+ double r_val = d->r[job_idx];
+
+ double val = d->init_val;
+
+ for (int k = 0; k < td->nb_coeffs[0]; k++)
+ basis_val[0][k] = tdi_basis_eval(d->basis[0], d->diff_order[0], r_val, k);
+ for (int k = 0; k < td->nb_coeffs[1]; k++)
+ basis_val[1][k] = tdi_basis_eval(d->basis[1], d->diff_order[1], theta_val, k);
+
+ val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], d->coeffs,
+ basis_val[0], basis_val[1]);
+
+ d->out[job_idx] = val;
+}
+
+static int maximal_slicing_eval(void *opaque, unsigned int eq_idx,
+ const unsigned int *colloc_grid_order,
+ const double * const *colloc_grid,
+ const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB],
+ double *dst)
+{
+ MaximalSlicingEval *max = opaque;
+
+ for (int idx1 = 0; idx1 < colloc_grid_order[1]; idx1++) {
+ const double theta = colloc_grid[1][idx1];
+ const double st = sin(theta);
+ const double st2 = SQR(st);
+ const double ct = cos(theta);
+
+ for (int idx0 = 0; idx0 < colloc_grid_order[0]; idx0++) {
+ const double r = colloc_grid[0][idx0];
+ const double r2 = SQR(r);
+
+ const int idx = idx1 * colloc_grid_order[0] + idx0;
+
+ const double alpha = vars[0][PSSOLVE_DIFF_ORDER_00][idx] + 1.0;
+ const double dalpha_r = vars[0][PSSOLVE_DIFF_ORDER_10][idx];
+ const double dalpha_t = vars[0][PSSOLVE_DIFF_ORDER_01][idx];
+ const double d2alpha_rr = vars[0][PSSOLVE_DIFF_ORDER_20][idx];
+ const double d2alpha_tt = vars[0][PSSOLVE_DIFF_ORDER_02][idx];
+ const double d2alpha_rt = vars[0][PSSOLVE_DIFF_ORDER_11][idx];
+
+ const double d2alpha[3][3] = {
+ { d2alpha_rr, d2alpha_rt, 0.0 },
+ { d2alpha_rt, d2alpha_tt, 0.0 },
+ { 0.0, 0.0, 0.0 },
+ };
+
+ const double psi = max->psi[idx];
+ const double psi2 = SQR(psi);
+ const double psi3 = psi * psi2;
+ const double psi4 = SQR(psi2);
+
+ const double dpsi_r = max->dpsi_r[idx];
+ const double dpsi_t = max->dpsi_t[idx];
+
+ const double g[3][3] = {
+ { psi4, 0.0, 0.0 },
+ { 0.0, psi4 * r2, 0.0 },
+ { 0.0, 0.0, psi4 * r2 * st2},
+ };
+ const double gu[3][3] = {
+ { 1.0 / psi4, 0.0, 0.0 },
+ { 0.0, 1.0 / (psi4 * r2), 0.0 },
+ { 0.0, 0.0, 1.0 / (psi4 * r2 * st2) },
+ };
+
+ const double dg[3][3][3] = {
+ {
+ { 4.0 * dpsi_r * psi3, 0.0, 0.0 },
+ { 0.0, 4.0 * dpsi_r * psi3 * r2 + 2.0 * r * psi4, 0.0 },
+ { 0.0, 0.0, 4.0 * dpsi_r * psi3 * r2 * st2 + 2.0 * r * psi4 * st2 },
+ },
+ {
+ { 4.0 * dpsi_t * psi3, 0.0, 0.0 },
+ { 0.0, 4.0 * dpsi_t * psi3 * r2, 0.0 },
+ { 0.0, 0.0, 4.0 * dpsi_t * psi3 * r2 * st2 + 2.0 * psi4 * r2 * st * ct },
+
+ },
+ {
+ { 0.0, 0.0, 0.0 },
+ { 0.0, 0.0, 0.0 },
+ { 0.0, 0.0, 0.0 },
+ },
+ };
+
+ const double krr = max->krr[idx];
+ const double kpp = max->kpp[idx];
+ const double krt = max->krt[idx];
+
+ const double Km[3][3] = {
+ { krr, krt, 0.0 },
+ { krt / r2, -(krr + kpp), 0.0 },
+ { 0.0, 0.0, kpp },
+ };
+
+ double G[3][3][3], X[3];
+ double laplace_alpha, k2;
+
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++) {
+ double val = 0.0;
+
+ for (int l = 0; l < 3; l++)
+ val += gu[i][l] * (dg[k][j][l] + dg[j][k][l] - dg[l][j][k]);
+
+ G[i][j][k] = 0.5 * val;
+ }
+
+ for (int i = 0; i < 3; i++) {
+ double val = 0.0;
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++)
+ val += gu[j][k] * G[i][j][k];
+ X[i] = val;
+ }
+
+ laplace_alpha = 0.0;
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ laplace_alpha += gu[i][j] * d2alpha[i][j];
+
+ laplace_alpha -= X[0] * dalpha_r + X[1] * dalpha_t;
+
+ k2 = 0.0;
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ k2 += Km[i][j] * Km[j][i];
+
+ dst[idx] = laplace_alpha - alpha * k2;
+ }
+ }
+
+ return 0;
+}
+
+static int lapse_solve_max(const TDContext *td)
+{
+ MaximalSlicingEval max = { NULL };
+ TDPriv *priv = td->priv;
+ NLSolveContext *nl;
+ double *coords_r = NULL;
+ double *coords_t = NULL;
+ double *coeffs;
+ int ret = 0;
+
+ ret = tdi_nlsolve_context_alloc(&nl, 1);
+
+ if (ret < 0) {
+ tdi_log(&priv->logger, 0, "Error allocating the non-linear solver\n");
+ return ret;
+ }
+
+ nl->logger = priv->logger;
+ nl->tp = priv->tp;
+ nl->maxiter = td->max_iter;
+ nl->atol = td->atol;
+
+ nl->basis[0][0] = priv->basis[0][0];
+ nl->basis[0][1] = priv->basis[0][1];
+ nl->solve_order[0][0] = priv->basis_order[0][0];
+ nl->solve_order[0][1] = priv->basis_order[0][1];
+
+ ret = tdi_nlsolve_context_init(nl);
+ if (ret < 0) {
+ tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n");
+ goto fail;
+ }
+
+ ret = posix_memalign((void**)&max.psi, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&max.dpsi_r, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&max.dpsi_t, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&max.krr, 32, sizeof(*max.krr) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&max.kpp, 32, sizeof(*max.kpp) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&max.krt, 32, sizeof(*max.krt) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&coords_r, 32, sizeof(*coords_r) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&coords_t, 32, sizeof(*coords_t) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ ret |= posix_memalign((void**)&coeffs, 32, sizeof(*coeffs) * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ if (ret) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ for (int j = 0; j < td->nb_coeffs[1]; j++) {
+ for (int i = 0; i < td->nb_coeffs[0]; i++) {
+ coords_r[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][0][i];
+ coords_t[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][1][j];
+ }
+ }
+
+ td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 0, 0 }, max.psi);
+ td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 1, 0 }, max.dpsi_r);
+ td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 0, 1 }, max.dpsi_t);
+ td_eval_krr(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 0, 0 }, max.krr);
+ td_eval_kpp(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 0, 0 }, max.kpp);
+ td_eval_krt(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t,
+ (const unsigned int [2]){ 0, 0 }, max.krt);
+
+ ret = tdi_nlsolve_solve(nl, maximal_slicing_eval, NULL, &max, coeffs, 0);
+ if (ret < 0) {
+ tdi_log(&priv->logger, 0, "Error solving the maximal slicing equation\n");
+ goto fail;
+ }
+
+ priv->coeffs_lapse = coeffs;
+ coeffs = NULL;
+
+fail:
+ free(coords_r);
+ free(coords_t);
+ free(coeffs);
+ free(max.psi);
+ free(max.dpsi_r);
+ free(max.dpsi_t);
+ free(max.krr);
+ free(max.kpp);
+ free(max.krt);
+
+ tdi_nlsolve_context_free(&nl);
+ return ret;
+}
+
+int td_eval_lapse(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ TDPriv *priv = td->priv;
+ const int nb_threads = tp_get_nb_threads(priv->tp);
+ EvalVarData thread_data = { NULL };
+ int ret;
+
+ if (!priv->coeffs_lapse) {
+ ret = lapse_solve_max(td);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (diff_order[0] > 2 || diff_order[1] > 2) {
+ tdi_log(&priv->logger, 0, "Derivatives of higher order than 2 are not supported.\n");
+ return -ENOSYS;
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) {
+ const size_t alloc_elems = td->nb_coeffs[i] * nb_threads;
+ ret = posix_memalign((void**)&thread_data.basis_val[i], 32,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(thread_data.basis_val[i], 0,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ }
+
+ thread_data.td = td;
+ thread_data.r = r;
+ thread_data.theta = theta;
+ thread_data.out = out;
+ thread_data.diff_order[0] = diff_order[0];
+ thread_data.diff_order[1] = diff_order[1];
+ thread_data.basis[0] = priv->basis[0][0];
+ thread_data.basis[1] = priv->basis[0][1];
+ thread_data.coeffs = priv->coeffs_lapse;
+ thread_data.init_val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
+
+ tp_execute(priv->tp, nb_coords, eval_var_kernel, &thread_data);
+
+fail:
+ free(thread_data.basis_val[0]);
+ free(thread_data.basis_val[1]);
+
+ return 0;
+}
+
+static int eval_var(const TDContext *td, unsigned int var_idx,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2], double add,
+ double *out)
+{
+ TDPriv *s = td->priv;
+ const int nb_threads = tp_get_nb_threads(s->tp);
+ EvalVarData thread_data = { NULL };
+ int ret = 0;
+
+ if (diff_order[0] > 2 || diff_order[1] > 2) {
+ tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n");
+ return -ENOSYS;
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) {
+ const size_t alloc_elems = td->nb_coeffs[i] * nb_threads;
+ ret = posix_memalign((void**)&thread_data.basis_val[i], 32,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(thread_data.basis_val[i], 0,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ }
+
+ thread_data.td = td;
+ thread_data.r = r;
+ thread_data.theta = theta;
+ thread_data.out = out;
+ thread_data.diff_order[0] = diff_order[0];
+ thread_data.diff_order[1] = diff_order[1];
+ thread_data.basis[0] = s->basis[var_idx][0];
+ thread_data.basis[1] = s->basis[var_idx][1];
+ thread_data.coeffs = td->coeffs[var_idx];
+ thread_data.init_val = add;
+
+ tp_execute(s->tp, nb_coords, eval_var_kernel, &thread_data);
+
+fail:
+ free(thread_data.basis_val[0]);
+ free(thread_data.basis_val[1]);
+
+ return ret;
+}
+
+int td_eval_psi(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ const double add = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
+ return eval_var(td, 0, nb_coords, r, theta, diff_order, add, out);
+}
+
+int td_eval_krr(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ return eval_var(td, 1, nb_coords, r, theta, diff_order, 0.0, out);
+}
+int td_eval_kpp(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ return eval_var(td, 2, nb_coords, r, theta, diff_order, 0.0, out);
+}
+
+int td_eval_krt(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ static const double dummy_coord = 0.0;
+ static const double *dummy_coords[2] = { &dummy_coord, &dummy_coord };
+ static const unsigned int nb_dummy_coords[2] = { 1, 1 };
+
+ TDConstraintEvalContext *ce;
+ double (*eval)(TDConstraintEvalContext*, double, double);
+ int ret;
+
+ if (diff_order[0] == 0 && diff_order[1] == 0)
+ eval = tdi_constraint_eval_k_rtheta;
+ else if (diff_order[0] == 1 && diff_order[1] == 0)
+ eval = tdi_constraint_eval_dk_rtheta_r;
+ else if (diff_order[0] == 0 && diff_order[1] == 1)
+ eval = tdi_constraint_eval_dk_rtheta_t;
+ else
+ return -ENOSYS;
+
+ ret = tdi_ce_alloc(td, nb_dummy_coords, dummy_coords, td->amplitude, &ce);
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < nb_coords; i++) {
+ double theta_val = theta[i];
+ double r_val = r[i];
+
+ out[i] = eval(ce, r_val, theta_val);
+ }
+
+ tdi_constraint_eval_free(&ce);
+
+ return 0;
+}
+
+int td_eval_residual(const TDContext *td,
+ size_t nb_coords_r, const double *r,
+ size_t nb_coords_theta, const double *theta,
+ double *out[3])
+{
+ const unsigned int nb_coords[2] = { nb_coords_r, nb_coords_theta };
+ const double *coords[2] = { r, theta};
+ const size_t grid_size = nb_coords_r * nb_coords_theta;
+
+ TDConstraintEvalContext *ce = NULL;
+ double *r_2d = NULL, *theta_2d = NULL;
+ double *vars[TD_CONSTRAINT_VAR_NB][PSSOLVE_DIFF_ORDER_NB] = {{ NULL }};
+ int ret;
+
+ ret = posix_memalign((void**)&r_2d, 32, grid_size * sizeof(*r_2d));
+ ret |= posix_memalign((void**)&theta_2d, 32, grid_size * sizeof(*theta_2d));
+ if (ret) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ for (size_t j = 0; j < nb_coords_theta; j++) {
+ for (size_t i = 0; i < nb_coords_r; i++) {
+ const size_t idx = j * nb_coords_r + i;
+ r_2d[idx] = r[i];
+ theta_2d[idx] = theta[j];
+ }
+ }
+
+ for (int var = 0; var < TD_CONSTRAINT_VAR_NB; var++) {
+ for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) {
+ const unsigned int diff_orders[2] = {
+ tdi_pssolve_diff_order(diff_order, 0),
+ tdi_pssolve_diff_order(diff_order, 1),
+ };
+
+ ret = posix_memalign((void**)&vars[var][diff_order],
+ 32, grid_size * sizeof(*vars[var][diff_order]));
+ if (ret) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ ret = eval_var(td, var, grid_size, r_2d, theta_2d, diff_orders,
+ 0.0, vars[var][diff_order]);
+ if (ret)
+ goto fail;
+ }
+ }
+
+ ret = tdi_ce_alloc(td, nb_coords, coords, td->amplitude, &ce);
+ if (ret < 0)
+ goto fail;
+
+ ret = tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_HAM, vars, out[0]);
+ ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_0, vars, out[1]);
+ ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_1, vars, out[2]);
+ if (ret)
+ goto fail;
+
+ ret = 0;
+
+fail:
+ free(r_2d);
+ free(theta_2d);
+ tdi_constraint_eval_free(&ce);
+ for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) {
+ free(vars[0][diff_order]);
+ free(vars[1][diff_order]);
+ free(vars[2][diff_order]);
+ }
+
+ return ret;
+}