From bc9a2906fc30e2d73d6b4c23783df5e15c90278c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 16 Sep 2022 11:49:55 +0200 Subject: Move eval code to its own file. --- Makefile | 1 + eval.c | 547 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ init.c | 558 ++----------------------------------------------------------- internal.h | 51 ++++++ 4 files changed, 609 insertions(+), 548 deletions(-) create mode 100644 eval.c create mode 100644 internal.h diff --git a/Makefile b/Makefile index 9e07249..7903842 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ OBJS = basis.o \ bicgstab.o \ cpu.o \ cpuid.o \ + eval.o \ init.o \ log.o \ nlsolve.o \ diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..948468b --- /dev/null +++ b/eval.c @@ -0,0 +1,547 @@ +/* + * Copyright 2017-2022 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "basis.h" +#include "common.h" +#include "internal.h" +#include "log.h" +#include "pssolve.h" +#include "nlsolve.h" +#include "td_constraints.h" +#include "teukolsky_data.h" + +typedef struct EvalVarData { + const TDContext *td; + + const BasisSetContext *basis[2]; + + const double *coeffs; + + const double *theta; + const double *r; + + double *basis_val[2]; + double *out; + + double diff_order[2]; + double init_val; +} EvalVarData; + +typedef struct MaximalSlicingEval { + double *psi; + double *dpsi_r; + double *dpsi_t; + double *krr; + double *kpp; + double *krt; +} MaximalSlicingEval; + +static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat, + double *vec1, double *vec2) +{ + double val = 0.0; + for (int m = 0; m < len2; m++) { + double tmp = 0.0; + for (int n = 0; n < len1; n++) + tmp += mat[m * len1 + n] * vec1[n]; + val += tmp * vec2[m]; + } + return val; +} + +static int eval_var_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + EvalVarData *d = arg; + const TDContext *td = d->td; + const TDPriv *s = td->priv; + + double *basis_val[2] = { + d->basis_val[0] + thread_idx * td->nb_coeffs[0], + d->basis_val[1] + thread_idx * td->nb_coeffs[1], + }; + + double theta_val = d->theta[job_idx]; + double r_val = d->r[job_idx]; + + double val = d->init_val; + + for (int k = 0; k < td->nb_coeffs[0]; k++) + basis_val[0][k] = tdi_basis_eval(d->basis[0], d->diff_order[0], r_val, k); + for (int k = 0; k < td->nb_coeffs[1]; k++) + basis_val[1][k] = tdi_basis_eval(d->basis[1], d->diff_order[1], theta_val, k); + + val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], d->coeffs, + basis_val[0], basis_val[1]); + + d->out[job_idx] = val; +} + +static int maximal_slicing_eval(void *opaque, unsigned int eq_idx, + const unsigned int *colloc_grid_order, + const double * const *colloc_grid, + const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB], + double *dst) +{ + MaximalSlicingEval *max = opaque; + + for (int idx1 = 0; idx1 < colloc_grid_order[1]; idx1++) { + const double theta = colloc_grid[1][idx1]; + const double st = sin(theta); + const double st2 = SQR(st); + const double ct = cos(theta); + + for (int idx0 = 0; idx0 < colloc_grid_order[0]; idx0++) { + const double r = colloc_grid[0][idx0]; + const double r2 = SQR(r); + + const int idx = idx1 * colloc_grid_order[0] + idx0; + + const double alpha = vars[0][PSSOLVE_DIFF_ORDER_00][idx] + 1.0; + const double dalpha_r = vars[0][PSSOLVE_DIFF_ORDER_10][idx]; + const double dalpha_t = vars[0][PSSOLVE_DIFF_ORDER_01][idx]; + const double d2alpha_rr = vars[0][PSSOLVE_DIFF_ORDER_20][idx]; + const double d2alpha_tt = vars[0][PSSOLVE_DIFF_ORDER_02][idx]; + const double d2alpha_rt = vars[0][PSSOLVE_DIFF_ORDER_11][idx]; + + const double d2alpha[3][3] = { + { d2alpha_rr, d2alpha_rt, 0.0 }, + { d2alpha_rt, d2alpha_tt, 0.0 }, + { 0.0, 0.0, 0.0 }, + }; + + const double psi = max->psi[idx]; + const double psi2 = SQR(psi); + const double psi3 = psi * psi2; + const double psi4 = SQR(psi2); + + const double dpsi_r = max->dpsi_r[idx]; + const double dpsi_t = max->dpsi_t[idx]; + + const double g[3][3] = { + { psi4, 0.0, 0.0 }, + { 0.0, psi4 * r2, 0.0 }, + { 0.0, 0.0, psi4 * r2 * st2}, + }; + const double gu[3][3] = { + { 1.0 / psi4, 0.0, 0.0 }, + { 0.0, 1.0 / (psi4 * r2), 0.0 }, + { 0.0, 0.0, 1.0 / (psi4 * r2 * st2) }, + }; + + const double dg[3][3][3] = { + { + { 4.0 * dpsi_r * psi3, 0.0, 0.0 }, + { 0.0, 4.0 * dpsi_r * psi3 * r2 + 2.0 * r * psi4, 0.0 }, + { 0.0, 0.0, 4.0 * dpsi_r * psi3 * r2 * st2 + 2.0 * r * psi4 * st2 }, + }, + { + { 4.0 * dpsi_t * psi3, 0.0, 0.0 }, + { 0.0, 4.0 * dpsi_t * psi3 * r2, 0.0 }, + { 0.0, 0.0, 4.0 * dpsi_t * psi3 * r2 * st2 + 2.0 * psi4 * r2 * st * ct }, + + }, + { + { 0.0, 0.0, 0.0 }, + { 0.0, 0.0, 0.0 }, + { 0.0, 0.0, 0.0 }, + }, + }; + + const double krr = max->krr[idx]; + const double kpp = max->kpp[idx]; + const double krt = max->krt[idx]; + + const double Km[3][3] = { + { krr, krt, 0.0 }, + { krt / r2, -(krr + kpp), 0.0 }, + { 0.0, 0.0, kpp }, + }; + + double G[3][3][3], X[3]; + double laplace_alpha, k2; + + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + + for (int l = 0; l < 3; l++) + val += gu[i][l] * (dg[k][j][l] + dg[j][k][l] - dg[l][j][k]); + + G[i][j][k] = 0.5 * val; + } + + for (int i = 0; i < 3; i++) { + double val = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + val += gu[j][k] * G[i][j][k]; + X[i] = val; + } + + laplace_alpha = 0.0; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + laplace_alpha += gu[i][j] * d2alpha[i][j]; + + laplace_alpha -= X[0] * dalpha_r + X[1] * dalpha_t; + + k2 = 0.0; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + k2 += Km[i][j] * Km[j][i]; + + dst[idx] = laplace_alpha - alpha * k2; + } + } + + return 0; +} + +static int lapse_solve_max(const TDContext *td) +{ + MaximalSlicingEval max = { NULL }; + TDPriv *priv = td->priv; + NLSolveContext *nl; + double *coords_r = NULL; + double *coords_t = NULL; + double *coeffs; + int ret = 0; + + ret = tdi_nlsolve_context_alloc(&nl, 1); + + if (ret < 0) { + tdi_log(&priv->logger, 0, "Error allocating the non-linear solver\n"); + return ret; + } + + nl->logger = priv->logger; + nl->tp = priv->tp; + nl->maxiter = td->max_iter; + nl->atol = td->atol; + + nl->basis[0][0] = priv->basis[0][0]; + nl->basis[0][1] = priv->basis[0][1]; + nl->solve_order[0][0] = priv->basis_order[0][0]; + nl->solve_order[0][1] = priv->basis_order[0][1]; + + ret = tdi_nlsolve_context_init(nl); + if (ret < 0) { + tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n"); + goto fail; + } + + ret = posix_memalign((void**)&max.psi, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&max.dpsi_r, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&max.dpsi_t, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&max.krr, 32, sizeof(*max.krr) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&max.kpp, 32, sizeof(*max.kpp) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&max.krt, 32, sizeof(*max.krt) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&coords_r, 32, sizeof(*coords_r) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&coords_t, 32, sizeof(*coords_t) * td->nb_coeffs[0] * td->nb_coeffs[1]); + ret |= posix_memalign((void**)&coeffs, 32, sizeof(*coeffs) * td->nb_coeffs[0] * td->nb_coeffs[1]); + if (ret) { + ret = -ENOMEM; + goto fail; + } + + for (int j = 0; j < td->nb_coeffs[1]; j++) { + for (int i = 0; i < td->nb_coeffs[0]; i++) { + coords_r[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][0][i]; + coords_t[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][1][j]; + } + } + + td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 0, 0 }, max.psi); + td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 1, 0 }, max.dpsi_r); + td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 0, 1 }, max.dpsi_t); + td_eval_krr(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 0, 0 }, max.krr); + td_eval_kpp(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 0, 0 }, max.kpp); + td_eval_krt(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, + (const unsigned int [2]){ 0, 0 }, max.krt); + + ret = tdi_nlsolve_solve(nl, maximal_slicing_eval, NULL, &max, coeffs, 0); + if (ret < 0) { + tdi_log(&priv->logger, 0, "Error solving the maximal slicing equation\n"); + goto fail; + } + + priv->coeffs_lapse = coeffs; + coeffs = NULL; + +fail: + free(coords_r); + free(coords_t); + free(coeffs); + free(max.psi); + free(max.dpsi_r); + free(max.dpsi_t); + free(max.krr); + free(max.kpp); + free(max.krt); + + tdi_nlsolve_context_free(&nl); + return ret; +} + +int td_eval_lapse(const TDContext *td, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], + double *out) +{ + TDPriv *priv = td->priv; + const int nb_threads = tp_get_nb_threads(priv->tp); + EvalVarData thread_data = { NULL }; + int ret; + + if (!priv->coeffs_lapse) { + ret = lapse_solve_max(td); + if (ret < 0) + return ret; + } + + if (diff_order[0] > 2 || diff_order[1] > 2) { + tdi_log(&priv->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); + return -ENOSYS; + } + + for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { + const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; + ret = posix_memalign((void**)&thread_data.basis_val[i], 32, + sizeof(*thread_data.basis_val[i]) * alloc_elems); + if (ret != 0) { + ret = -ENOMEM; + goto fail; + } + memset(thread_data.basis_val[i], 0, + sizeof(*thread_data.basis_val[i]) * alloc_elems); + } + + thread_data.td = td; + thread_data.r = r; + thread_data.theta = theta; + thread_data.out = out; + thread_data.diff_order[0] = diff_order[0]; + thread_data.diff_order[1] = diff_order[1]; + thread_data.basis[0] = priv->basis[0][0]; + thread_data.basis[1] = priv->basis[0][1]; + thread_data.coeffs = priv->coeffs_lapse; + thread_data.init_val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; + + tp_execute(priv->tp, nb_coords, eval_var_kernel, &thread_data); + +fail: + free(thread_data.basis_val[0]); + free(thread_data.basis_val[1]); + + return 0; +} + +static int eval_var(const TDContext *td, unsigned int var_idx, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], double add, + double *out) +{ + TDPriv *s = td->priv; + const int nb_threads = tp_get_nb_threads(s->tp); + EvalVarData thread_data = { NULL }; + int ret = 0; + + if (diff_order[0] > 2 || diff_order[1] > 2) { + tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); + return -ENOSYS; + } + + for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { + const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; + ret = posix_memalign((void**)&thread_data.basis_val[i], 32, + sizeof(*thread_data.basis_val[i]) * alloc_elems); + if (ret != 0) { + ret = -ENOMEM; + goto fail; + } + memset(thread_data.basis_val[i], 0, + sizeof(*thread_data.basis_val[i]) * alloc_elems); + } + + thread_data.td = td; + thread_data.r = r; + thread_data.theta = theta; + thread_data.out = out; + thread_data.diff_order[0] = diff_order[0]; + thread_data.diff_order[1] = diff_order[1]; + thread_data.basis[0] = s->basis[var_idx][0]; + thread_data.basis[1] = s->basis[var_idx][1]; + thread_data.coeffs = td->coeffs[var_idx]; + thread_data.init_val = add; + + tp_execute(s->tp, nb_coords, eval_var_kernel, &thread_data); + +fail: + free(thread_data.basis_val[0]); + free(thread_data.basis_val[1]); + + return ret; +} + +int td_eval_psi(const TDContext *td, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], + double *out) +{ + const double add = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; + return eval_var(td, 0, nb_coords, r, theta, diff_order, add, out); +} + +int td_eval_krr(const TDContext *td, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], + double *out) +{ + return eval_var(td, 1, nb_coords, r, theta, diff_order, 0.0, out); +} +int td_eval_kpp(const TDContext *td, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], + double *out) +{ + return eval_var(td, 2, nb_coords, r, theta, diff_order, 0.0, out); +} + +int td_eval_krt(const TDContext *td, + size_t nb_coords, const double *r, const double *theta, + const unsigned int diff_order[2], + double *out) +{ + static const double dummy_coord = 0.0; + static const double *dummy_coords[2] = { &dummy_coord, &dummy_coord }; + static const unsigned int nb_dummy_coords[2] = { 1, 1 }; + + TDConstraintEvalContext *ce; + double (*eval)(TDConstraintEvalContext*, double, double); + int ret; + + if (diff_order[0] == 0 && diff_order[1] == 0) + eval = tdi_constraint_eval_k_rtheta; + else if (diff_order[0] == 1 && diff_order[1] == 0) + eval = tdi_constraint_eval_dk_rtheta_r; + else if (diff_order[0] == 0 && diff_order[1] == 1) + eval = tdi_constraint_eval_dk_rtheta_t; + else + return -ENOSYS; + + ret = tdi_ce_alloc(td, nb_dummy_coords, dummy_coords, td->amplitude, &ce); + if (ret < 0) + return ret; + + for (int i = 0; i < nb_coords; i++) { + double theta_val = theta[i]; + double r_val = r[i]; + + out[i] = eval(ce, r_val, theta_val); + } + + tdi_constraint_eval_free(&ce); + + return 0; +} + +int td_eval_residual(const TDContext *td, + size_t nb_coords_r, const double *r, + size_t nb_coords_theta, const double *theta, + double *out[3]) +{ + const unsigned int nb_coords[2] = { nb_coords_r, nb_coords_theta }; + const double *coords[2] = { r, theta}; + const size_t grid_size = nb_coords_r * nb_coords_theta; + + TDConstraintEvalContext *ce = NULL; + double *r_2d = NULL, *theta_2d = NULL; + double *vars[TD_CONSTRAINT_VAR_NB][PSSOLVE_DIFF_ORDER_NB] = {{ NULL }}; + int ret; + + ret = posix_memalign((void**)&r_2d, 32, grid_size * sizeof(*r_2d)); + ret |= posix_memalign((void**)&theta_2d, 32, grid_size * sizeof(*theta_2d)); + if (ret) { + ret = -ENOMEM; + goto fail; + } + + for (size_t j = 0; j < nb_coords_theta; j++) { + for (size_t i = 0; i < nb_coords_r; i++) { + const size_t idx = j * nb_coords_r + i; + r_2d[idx] = r[i]; + theta_2d[idx] = theta[j]; + } + } + + for (int var = 0; var < TD_CONSTRAINT_VAR_NB; var++) { + for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { + const unsigned int diff_orders[2] = { + tdi_pssolve_diff_order(diff_order, 0), + tdi_pssolve_diff_order(diff_order, 1), + }; + + ret = posix_memalign((void**)&vars[var][diff_order], + 32, grid_size * sizeof(*vars[var][diff_order])); + if (ret) { + ret = -ENOMEM; + goto fail; + } + + ret = eval_var(td, var, grid_size, r_2d, theta_2d, diff_orders, + 0.0, vars[var][diff_order]); + if (ret) + goto fail; + } + } + + ret = tdi_ce_alloc(td, nb_coords, coords, td->amplitude, &ce); + if (ret < 0) + goto fail; + + ret = tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_HAM, vars, out[0]); + ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_0, vars, out[1]); + ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_1, vars, out[2]); + if (ret) + goto fail; + + ret = 0; + +fail: + free(r_2d); + free(theta_2d); + tdi_constraint_eval_free(&ce); + for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { + free(vars[0][diff_order]); + free(vars[1][diff_order]); + free(vars[2][diff_order]); + } + + return ret; +} diff --git a/init.c b/init.c index 0370d46..82f738b 100644 --- a/init.c +++ b/init.c @@ -31,32 +31,12 @@ #include "basis.h" #include "common.h" #include "cpu.h" +#include "internal.h" #include "log.h" #include "nlsolve.h" #include "td_constraints.h" #include "teukolsky_data.h" -#define NB_EQUATIONS 3 - -typedef struct TDPriv { - unsigned int basis_order[NB_EQUATIONS][2]; - BasisSetContext *basis[NB_EQUATIONS][2]; - - TPContext *tp; - TDLogger logger; - - double *coeffs; - double *coeffs_tmp; - ptrdiff_t coeffs_stride; - - double *coeffs_lapse; - - int cpu_flags; - - double (*scalarproduct_metric)(size_t len1, size_t len2, double *mat, - double *vec1, double *vec2); -} TDPriv; - double tdi_scalarproduct_metric_fma3(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); double tdi_scalarproduct_metric_avx(size_t len1, size_t len2, double *mat, @@ -160,9 +140,9 @@ static int teukolsky_init_check_options(TDContext *td) return 0; } -static int constraint_eval_alloc(const TDContext *td, const unsigned int *nb_coords, - const double * const *coords, - double amplitude, TDConstraintEvalContext **pce) +int tdi_ce_alloc(const TDContext *td, const unsigned int *nb_coords, + const double * const *coords, + double amplitude, TDConstraintEvalContext **pce) { TDPriv *priv = td->priv; TDConstraintEvalContext *ce; @@ -253,7 +233,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) if (ret < 0) goto fail; - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], 0.0, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], 0.0, &ce); if (ret < 0) goto fail; if (fabs(td->amplitude) >= ce->a_diverge) { @@ -275,7 +255,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) if (td->solution_branch == 0 || coeffs_init) { // direct solve with default (flat space) or user-provided initial guess - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], td->amplitude, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], td->amplitude, &ce); if (ret < 0) goto fail; @@ -295,7 +275,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) double cur_amplitude, new_amplitude, step; - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a0, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a0, &ce); if (ret < 0) goto fail; @@ -305,7 +285,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) if (ret < 0) goto fail; - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); if (ret < 0) goto fail; @@ -319,7 +299,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) cblas_daxpy(N, -1.0, s->coeffs_tmp, 1, s->coeffs, 1); // obtain solution for a1 in the upper branch - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); if (ret < 0) goto fail; @@ -340,7 +320,7 @@ int td_solve(TDContext *td, double *coeffs_init[3]) new_amplitude = td->amplitude; tdi_log(&s->logger, 2, "Trying amplitude %g\n", new_amplitude); - ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], new_amplitude, &ce); + ret = tdi_ce_alloc(td, td->nb_coeffs, nl->colloc_grid[0], new_amplitude, &ce); if (ret < 0) goto fail; @@ -434,521 +414,3 @@ void td_context_free(TDContext **ptd) free(td); *ptd = NULL; } - -typedef struct MaximalSlicingEval { - double *psi; - double *dpsi_r; - double *dpsi_t; - double *krr; - double *kpp; - double *krt; -} MaximalSlicingEval; - -static int maximal_slicing_eval(void *opaque, unsigned int eq_idx, - const unsigned int *colloc_grid_order, - const double * const *colloc_grid, - const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB], - double *dst) -{ - MaximalSlicingEval *max = opaque; - - for (int idx1 = 0; idx1 < colloc_grid_order[1]; idx1++) { - const double theta = colloc_grid[1][idx1]; - const double st = sin(theta); - const double st2 = SQR(st); - const double ct = cos(theta); - - for (int idx0 = 0; idx0 < colloc_grid_order[0]; idx0++) { - const double r = colloc_grid[0][idx0]; - const double r2 = SQR(r); - - const int idx = idx1 * colloc_grid_order[0] + idx0; - - const double alpha = vars[0][PSSOLVE_DIFF_ORDER_00][idx] + 1.0; - const double dalpha_r = vars[0][PSSOLVE_DIFF_ORDER_10][idx]; - const double dalpha_t = vars[0][PSSOLVE_DIFF_ORDER_01][idx]; - const double d2alpha_rr = vars[0][PSSOLVE_DIFF_ORDER_20][idx]; - const double d2alpha_tt = vars[0][PSSOLVE_DIFF_ORDER_02][idx]; - const double d2alpha_rt = vars[0][PSSOLVE_DIFF_ORDER_11][idx]; - - const double d2alpha[3][3] = { - { d2alpha_rr, d2alpha_rt, 0.0 }, - { d2alpha_rt, d2alpha_tt, 0.0 }, - { 0.0, 0.0, 0.0 }, - }; - - const double psi = max->psi[idx]; - const double psi2 = SQR(psi); - const double psi3 = psi * psi2; - const double psi4 = SQR(psi2); - - const double dpsi_r = max->dpsi_r[idx]; - const double dpsi_t = max->dpsi_t[idx]; - - const double g[3][3] = { - { psi4, 0.0, 0.0 }, - { 0.0, psi4 * r2, 0.0 }, - { 0.0, 0.0, psi4 * r2 * st2}, - }; - const double gu[3][3] = { - { 1.0 / psi4, 0.0, 0.0 }, - { 0.0, 1.0 / (psi4 * r2), 0.0 }, - { 0.0, 0.0, 1.0 / (psi4 * r2 * st2) }, - }; - - const double dg[3][3][3] = { - { - { 4.0 * dpsi_r * psi3, 0.0, 0.0 }, - { 0.0, 4.0 * dpsi_r * psi3 * r2 + 2.0 * r * psi4, 0.0 }, - { 0.0, 0.0, 4.0 * dpsi_r * psi3 * r2 * st2 + 2.0 * r * psi4 * st2 }, - }, - { - { 4.0 * dpsi_t * psi3, 0.0, 0.0 }, - { 0.0, 4.0 * dpsi_t * psi3 * r2, 0.0 }, - { 0.0, 0.0, 4.0 * dpsi_t * psi3 * r2 * st2 + 2.0 * psi4 * r2 * st * ct }, - - }, - { - { 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0 }, - { 0.0, 0.0, 0.0 }, - }, - }; - - const double krr = max->krr[idx]; - const double kpp = max->kpp[idx]; - const double krt = max->krt[idx]; - - const double Km[3][3] = { - { krr, krt, 0.0 }, - { krt / r2, -(krr + kpp), 0.0 }, - { 0.0, 0.0, kpp }, - }; - - double G[3][3][3], X[3]; - double laplace_alpha, k2; - - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - - for (int l = 0; l < 3; l++) - val += gu[i][l] * (dg[k][j][l] + dg[j][k][l] - dg[l][j][k]); - - G[i][j][k] = 0.5 * val; - } - - for (int i = 0; i < 3; i++) { - double val = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - val += gu[j][k] * G[i][j][k]; - X[i] = val; - } - - laplace_alpha = 0.0; - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - laplace_alpha += gu[i][j] * d2alpha[i][j]; - - laplace_alpha -= X[0] * dalpha_r + X[1] * dalpha_t; - - k2 = 0.0; - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - k2 += Km[i][j] * Km[j][i]; - - dst[idx] = laplace_alpha - alpha * k2; - } - } - - return 0; -} - -static int lapse_solve_max(const TDContext *td) -{ - MaximalSlicingEval max = { NULL }; - TDPriv *priv = td->priv; - NLSolveContext *nl; - double *coords_r = NULL; - double *coords_t = NULL; - double *coeffs; - int ret = 0; - - ret = tdi_nlsolve_context_alloc(&nl, 1); - - if (ret < 0) { - tdi_log(&priv->logger, 0, "Error allocating the non-linear solver\n"); - return ret; - } - - nl->logger = priv->logger; - nl->tp = priv->tp; - nl->maxiter = td->max_iter; - nl->atol = td->atol; - - nl->basis[0][0] = priv->basis[0][0]; - nl->basis[0][1] = priv->basis[0][1]; - nl->solve_order[0][0] = priv->basis_order[0][0]; - nl->solve_order[0][1] = priv->basis_order[0][1]; - - ret = tdi_nlsolve_context_init(nl); - if (ret < 0) { - tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n"); - goto fail; - } - - ret = posix_memalign((void**)&max.psi, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&max.dpsi_r, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&max.dpsi_t, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&max.krr, 32, sizeof(*max.krr) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&max.kpp, 32, sizeof(*max.kpp) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&max.krt, 32, sizeof(*max.krt) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&coords_r, 32, sizeof(*coords_r) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&coords_t, 32, sizeof(*coords_t) * td->nb_coeffs[0] * td->nb_coeffs[1]); - ret |= posix_memalign((void**)&coeffs, 32, sizeof(*coeffs) * td->nb_coeffs[0] * td->nb_coeffs[1]); - if (ret) { - ret = -ENOMEM; - goto fail; - } - - for (int j = 0; j < td->nb_coeffs[1]; j++) { - for (int i = 0; i < td->nb_coeffs[0]; i++) { - coords_r[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][0][i]; - coords_t[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][1][j]; - } - } - - td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 0, 0 }, max.psi); - td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 1, 0 }, max.dpsi_r); - td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 0, 1 }, max.dpsi_t); - td_eval_krr(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 0, 0 }, max.krr); - td_eval_kpp(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 0, 0 }, max.kpp); - td_eval_krt(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, - (const unsigned int [2]){ 0, 0 }, max.krt); - - ret = tdi_nlsolve_solve(nl, maximal_slicing_eval, NULL, &max, coeffs, 0); - if (ret < 0) { - tdi_log(&priv->logger, 0, "Error solving the maximal slicing equation\n"); - goto fail; - } - - priv->coeffs_lapse = coeffs; - coeffs = NULL; - -fail: - free(coords_r); - free(coords_t); - free(coeffs); - free(max.psi); - free(max.dpsi_r); - free(max.dpsi_t); - free(max.krr); - free(max.kpp); - free(max.krt); - - tdi_nlsolve_context_free(&nl); - return ret; -} - -static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat, - double *vec1, double *vec2) -{ - double val = 0.0; - for (int m = 0; m < len2; m++) { - double tmp = 0.0; - for (int n = 0; n < len1; n++) - tmp += mat[m * len1 + n] * vec1[n]; - val += tmp * vec2[m]; - } - return val; -} - -typedef struct EvalVarData { - const TDContext *td; - - const BasisSetContext *basis[2]; - - const double *coeffs; - - const double *theta; - const double *r; - - double *basis_val[2]; - double *out; - - double diff_order[2]; - double init_val; -} EvalVarData; - -static int eval_var_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx) -{ - EvalVarData *d = arg; - const TDContext *td = d->td; - const TDPriv *s = td->priv; - - double *basis_val[2] = { - d->basis_val[0] + thread_idx * td->nb_coeffs[0], - d->basis_val[1] + thread_idx * td->nb_coeffs[1], - }; - - double theta_val = d->theta[job_idx]; - double r_val = d->r[job_idx]; - - double val = d->init_val; - - for (int k = 0; k < td->nb_coeffs[0]; k++) - basis_val[0][k] = tdi_basis_eval(d->basis[0], d->diff_order[0], r_val, k); - for (int k = 0; k < td->nb_coeffs[1]; k++) - basis_val[1][k] = tdi_basis_eval(d->basis[1], d->diff_order[1], theta_val, k); - - val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], d->coeffs, - basis_val[0], basis_val[1]); - - d->out[job_idx] = val; -} - -static int eval_var(const TDContext *td, unsigned int var_idx, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], double add, - double *out) -{ - TDPriv *s = td->priv; - const int nb_threads = tp_get_nb_threads(s->tp); - EvalVarData thread_data = { NULL }; - int ret = 0; - - if (diff_order[0] > 2 || diff_order[1] > 2) { - tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); - return -ENOSYS; - } - - for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { - const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; - ret = posix_memalign((void**)&thread_data.basis_val[i], 32, - sizeof(*thread_data.basis_val[i]) * alloc_elems); - if (ret != 0) { - ret = -ENOMEM; - goto fail; - } - memset(thread_data.basis_val[i], 0, - sizeof(*thread_data.basis_val[i]) * alloc_elems); - } - - thread_data.td = td; - thread_data.r = r; - thread_data.theta = theta; - thread_data.out = out; - thread_data.diff_order[0] = diff_order[0]; - thread_data.diff_order[1] = diff_order[1]; - thread_data.basis[0] = s->basis[var_idx][0]; - thread_data.basis[1] = s->basis[var_idx][1]; - thread_data.coeffs = td->coeffs[var_idx]; - thread_data.init_val = add; - - tp_execute(s->tp, nb_coords, eval_var_kernel, &thread_data); - -fail: - free(thread_data.basis_val[0]); - free(thread_data.basis_val[1]); - - return ret; -} - -int td_eval_psi(const TDContext *td, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], - double *out) -{ - const double add = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; - return eval_var(td, 0, nb_coords, r, theta, diff_order, add, out); -} - -int td_eval_krr(const TDContext *td, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], - double *out) -{ - return eval_var(td, 1, nb_coords, r, theta, diff_order, 0.0, out); -} -int td_eval_kpp(const TDContext *td, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], - double *out) -{ - return eval_var(td, 2, nb_coords, r, theta, diff_order, 0.0, out); -} - -int td_eval_krt(const TDContext *td, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], - double *out) -{ - static const double dummy_coord = 0.0; - static const double *dummy_coords[2] = { &dummy_coord, &dummy_coord }; - static const unsigned int nb_dummy_coords[2] = { 1, 1 }; - - TDConstraintEvalContext *ce; - double (*eval)(TDConstraintEvalContext*, double, double); - int ret; - - if (diff_order[0] == 0 && diff_order[1] == 0) - eval = tdi_constraint_eval_k_rtheta; - else if (diff_order[0] == 1 && diff_order[1] == 0) - eval = tdi_constraint_eval_dk_rtheta_r; - else if (diff_order[0] == 0 && diff_order[1] == 1) - eval = tdi_constraint_eval_dk_rtheta_t; - else - return -ENOSYS; - - ret = constraint_eval_alloc(td, nb_dummy_coords, dummy_coords, - td->amplitude, &ce); - if (ret < 0) - return ret; - - for (int i = 0; i < nb_coords; i++) { - double theta_val = theta[i]; - double r_val = r[i]; - - out[i] = eval(ce, r_val, theta_val); - } - - tdi_constraint_eval_free(&ce); - - return 0; -} - -int td_eval_lapse(const TDContext *td, - size_t nb_coords, const double *r, const double *theta, - const unsigned int diff_order[2], - double *out) -{ - TDPriv *priv = td->priv; - const int nb_threads = tp_get_nb_threads(priv->tp); - EvalVarData thread_data = { NULL }; - int ret; - - if (!priv->coeffs_lapse) { - ret = lapse_solve_max(td); - if (ret < 0) - return ret; - } - - if (diff_order[0] > 2 || diff_order[1] > 2) { - tdi_log(&priv->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); - return -ENOSYS; - } - - for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) { - const size_t alloc_elems = td->nb_coeffs[i] * nb_threads; - ret = posix_memalign((void**)&thread_data.basis_val[i], 32, - sizeof(*thread_data.basis_val[i]) * alloc_elems); - if (ret != 0) { - ret = -ENOMEM; - goto fail; - } - memset(thread_data.basis_val[i], 0, - sizeof(*thread_data.basis_val[i]) * alloc_elems); - } - - thread_data.td = td; - thread_data.r = r; - thread_data.theta = theta; - thread_data.out = out; - thread_data.diff_order[0] = diff_order[0]; - thread_data.diff_order[1] = diff_order[1]; - thread_data.basis[0] = priv->basis[0][0]; - thread_data.basis[1] = priv->basis[0][1]; - thread_data.coeffs = priv->coeffs_lapse; - thread_data.init_val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; - - tp_execute(priv->tp, nb_coords, eval_var_kernel, &thread_data); - -fail: - free(thread_data.basis_val[0]); - free(thread_data.basis_val[1]); - - return 0; -} - -int td_eval_residual(const TDContext *td, - size_t nb_coords_r, const double *r, - size_t nb_coords_theta, const double *theta, - double *out[3]) -{ - const unsigned int nb_coords[2] = { nb_coords_r, nb_coords_theta }; - const double *coords[2] = { r, theta}; - const size_t grid_size = nb_coords_r * nb_coords_theta; - - TDConstraintEvalContext *ce = NULL; - double *r_2d = NULL, *theta_2d = NULL; - double *vars[TD_CONSTRAINT_VAR_NB][PSSOLVE_DIFF_ORDER_NB] = {{ NULL }}; - int ret; - - ret = posix_memalign((void**)&r_2d, 32, grid_size * sizeof(*r_2d)); - ret |= posix_memalign((void**)&theta_2d, 32, grid_size * sizeof(*theta_2d)); - if (ret) { - ret = -ENOMEM; - goto fail; - } - - for (size_t j = 0; j < nb_coords_theta; j++) { - for (size_t i = 0; i < nb_coords_r; i++) { - const size_t idx = j * nb_coords_r + i; - r_2d[idx] = r[i]; - theta_2d[idx] = theta[j]; - } - } - - for (int var = 0; var < TD_CONSTRAINT_VAR_NB; var++) { - for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { - const unsigned int diff_orders[2] = { - tdi_pssolve_diff_order(diff_order, 0), - tdi_pssolve_diff_order(diff_order, 1), - }; - - ret = posix_memalign((void**)&vars[var][diff_order], - 32, grid_size * sizeof(*vars[var][diff_order])); - if (ret) { - ret = -ENOMEM; - goto fail; - } - - ret = eval_var(td, var, grid_size, r_2d, theta_2d, diff_orders, - 0.0, vars[var][diff_order]); - if (ret) - goto fail; - } - } - - ret = constraint_eval_alloc(td, nb_coords, coords, - td->amplitude, &ce); - if (ret < 0) - goto fail; - - ret = tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_HAM, vars, out[0]); - ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_0, vars, out[1]); - ret |= tdi_constraint_eval(ce, TD_CONSTRAINT_EQ_MOM_1, vars, out[2]); - if (ret) - goto fail; - - ret = 0; - -fail: - free(r_2d); - free(theta_2d); - tdi_constraint_eval_free(&ce); - for (int diff_order = 0; diff_order < PSSOLVE_DIFF_ORDER_NB; diff_order++) { - free(vars[0][diff_order]); - free(vars[1][diff_order]); - free(vars[2][diff_order]); - } - - return ret; -} diff --git a/internal.h b/internal.h new file mode 100644 index 0000000..155bd77 --- /dev/null +++ b/internal.h @@ -0,0 +1,51 @@ +/* + * Copyright 2017-2022 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TEUKOLSKY_DATA_INTERNAL_H +#define TEUKOLSKY_DATA_INTERNAL_H + +#include "basis.h" +#include "log.h" +#include "td_constraints.h" +#include "teukolsky_data.h" + +#define NB_EQUATIONS 3 + +typedef struct TDPriv { + unsigned int basis_order[NB_EQUATIONS][2]; + BasisSetContext *basis[NB_EQUATIONS][2]; + + TPContext *tp; + TDLogger logger; + + double *coeffs; + double *coeffs_tmp; + ptrdiff_t coeffs_stride; + + double *coeffs_lapse; + + int cpu_flags; + + double (*scalarproduct_metric)(size_t len1, size_t len2, double *mat, + double *vec1, double *vec2); +} TDPriv; + +int tdi_ce_alloc(const TDContext *td, const unsigned int *nb_coords, + const double * const *coords, + double amplitude, TDConstraintEvalContext **pce); + +#endif // TEUKOLSKY_DATA_INTERNAL_H -- cgit v1.2.3