/* * Copyright 2017 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "config.h" #include #include #include #include #include #include #include #if HAVE_OPENCL #include #include #endif #include "basis.h" #include "common.h" #include "cpu.h" #include "log.h" #include "nlsolve.h" #include "td_constraints.h" #include "teukolsky_data.h" #include "threadpool.h" #define NB_EQUATIONS 3 typedef struct TDPriv { unsigned int basis_order[NB_EQUATIONS][2]; BasisSetContext *basis[NB_EQUATIONS][2]; ThreadPoolContext *tp; TDLogger logger; double *coeffs; double *coeffs_tmp; ptrdiff_t coeffs_stride; double *coeffs_lapse; int cpu_flags; double (*scalarproduct_metric)(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); } TDPriv; double tdi_scalarproduct_metric_fma3(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); double tdi_scalarproduct_metric_avx(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); double tdi_scalarproduct_metric_sse3(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); double tdi_scalarproduct_metric_c(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); static void init_opencl(TDPriv *s) #if HAVE_OPENCL { int err, count; cl_platform_id platform; cl_context_properties props[3]; cl_device_id ocl_device; err = clGetPlatformIDs(1, &platform, &count); if (err != CL_SUCCESS || count < 1) { tdi_log(&s->logger, 0, "Could not get an OpenCL platform ID\n"); return; } err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count); if (err != CL_SUCCESS || count < 1) { tdi_log(&s->logger, 0, "Could not get an OpenCL device ID\n"); return; } props[0] = CL_CONTEXT_PLATFORM; props[1] = (cl_context_properties)platform; props[2] = 0; s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err); if (err != CL_SUCCESS || !s->ocl_ctx) { tdi_log(&s->logger, 0, "Could not create an OpenCL context\n"); return; } s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err); if (err != CL_SUCCESS || !s->ocl_queue) { tdi_log(&s->logger, 0, "Could not create an OpenCL command queue: %d\n", err); goto fail; } err = clblasSetup(); if (err != CL_SUCCESS) { tdi_log(&s->logger, 0, "Error setting up clBLAS\n"); goto fail; } return; fail: if (s->ocl_queue) clReleaseCommandQueue(s->ocl_queue); s->ocl_queue = 0; if (s->ocl_ctx) clReleaseContext(s->ocl_ctx); s->ocl_ctx = 0; } #else { } #endif static const enum BasisFamily basis_sets[NB_EQUATIONS][2] = { { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN }, { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN }, { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN }, }; static void log_callback(TDLogger *log, int level, const char *fmt, va_list vl) { TDContext *td = log->opaque; td->log_callback(td, level, fmt, vl); } static int teukolsky_init_check_options(TDContext *td) { TDPriv *s = td->priv; int ret; s->cpu_flags = tdi_init_cpu_flags(); if (!td->nb_threads) { td->nb_threads = tdi_cpu_count(); if (!td->nb_threads) td->nb_threads = 1; } ret = tdi_threadpool_init(&s->tp, td->nb_threads); if (ret < 0) return ret; init_opencl(s); s->logger.log = log_callback; s->logger.opaque = td; //s->scalarproduct_metric = tdi_scalarproduct_metric_c; //if (EXTERNAL_SSE3(s->cpu_flags)) // s->scalarproduct_metric = tdi_scalarproduct_metric_sse3; //if (EXTERNAL_AVX(s->cpu_flags)) // s->scalarproduct_metric = tdi_scalarproduct_metric_avx; //if (EXTERNAL_FMA3(s->cpu_flags)) // s->scalarproduct_metric = tdi_scalarproduct_metric_fma3; // for (int i = 0; i < ctx->nb_equations; i++) // for (int j = 0; j < 2; j++) { // s->ps_ctx->basis[i][j] = ctx->basis[i][j]; // s->ps_ctx->solve_order[i][j] = basis_order[i][j]; // max_order = MAX(max_order, basis_order[i][j]); // } // //for (int i = 0; i < max_order; i++) { // tdi_log(&s->logger, 2, "%d ", i); // for (int j = 0; j < ctx->nb_equations; j++) // for (int k = 0; k < 2; k++) { // if (i < s->ps_ctx->solve_order[j][k]) // tdi_log(&s->logger, 2, "%8.8g\t", s->ps_ctx->colloc_grid[j][k][i]); // else // tdi_log(&s->logger, 2, " "); // } // tdi_log(&s->logger, 2, "\n"); //} s->basis_order[0][0] = td->nb_coeffs[0]; s->basis_order[0][1] = td->nb_coeffs[1]; s->basis_order[1][0] = td->nb_coeffs[0]; s->basis_order[1][1] = td->nb_coeffs[1]; s->basis_order[2][0] = td->nb_coeffs[0]; s->basis_order[2][1] = td->nb_coeffs[1]; ret = posix_memalign((void**)&s->coeffs, 32, sizeof(*s->coeffs) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]); if (ret) return -ENOMEM; memset(s->coeffs, 0, sizeof(*s->coeffs) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]); if (td->solution_branch > 0) { ret = posix_memalign((void**)&s->coeffs_tmp, 32, sizeof(*s->coeffs_tmp) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]); if (ret) return -ENOMEM; memset(s->coeffs_tmp, 0, sizeof(*s->coeffs_tmp) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]); } for (int i = 0; i < NB_EQUATIONS; i++) td->coeffs[i] = s->coeffs + i * td->nb_coeffs[0] * td->nb_coeffs[1]; for (int i = 0; i < ARRAY_ELEMS(basis_sets); i++) for (int j = 0; j < ARRAY_ELEMS(basis_sets[i]); j++) { double sf; ret = tdi_basis_init(&s->basis[i][j], basis_sets[i][j], td->basis_scale_factor[j]); if (ret < 0) return ret; } return 0; } static int constraint_eval_alloc(const TDContext *td, const unsigned int *nb_coords, const double * const *coords, double amplitude, TDConstraintEvalContext **pce) { TDPriv *priv = td->priv; TDConstraintEvalContext *ce; int ret; ret = tdi_constraint_eval_alloc(&ce, td->family); if (ret < 0) { tdi_log(&priv->logger, 0, "Error allocating the constraints evaluator\n"); return ret; } ce->logger = priv->logger; ce->amplitude = amplitude; ce->nb_coords[0] = nb_coords[0]; ce->nb_coords[1] = nb_coords[1]; ce->coords[0] = coords[0]; ce->coords[1] = coords[1]; ret = tdi_constraint_eval_init(ce); if (ret < 0) { tdi_constraint_eval_free(&ce); tdi_log(&priv->logger, 0, "Error initializing the constraints evaluator\n"); return ret; } *pce = ce; return 0; } static int nlsolve_alloc(const TDContext *td, NLSolveContext **pnl) { TDPriv *s = td->priv; NLSolveContext *nl; int ret; ret = tdi_nlsolve_context_alloc(&nl, ARRAY_ELEMS(basis_sets)); if (ret < 0) { tdi_log(&s->logger, 0, "Error allocating the non-linear solver\n"); return ret; } nl->logger = s->logger; nl->tp = s->tp; nl->maxiter = td->max_iter; nl->atol = td->atol; memcpy(nl->basis, s->basis, sizeof(s->basis)); memcpy(nl->solve_order, s->basis_order, sizeof(s->basis_order)); #if HAVE_OPENCL nl->ocl_ctx = s->ocl_ctx; nl->ocl_queue = s->ocl_queue; #endif ret = tdi_nlsolve_context_init(nl); if (ret < 0) { tdi_log(&s->logger, 0, "Error initializing the non-linear solver\n"); goto fail; } *pnl = nl; return 0; fail: tdi_nlsolve_context_free(&nl); return ret; } static int teukolsky_constraint_eval(void *opaque, unsigned int eq_idx, const unsigned int *colloc_grid_order, const double * const *colloc_grid, const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB], double *dst) { TDConstraintEvalContext *ce = opaque; const double *amplitude = opaque; return tdi_constraint_eval(ce, eq_idx, vars, dst); } int td_solve(TDContext *td, double *coeffs_init[3]) { TDPriv *s = td->priv; NLSolveContext *nl; TDConstraintEvalContext *ce; double a0; int ret; ret = teukolsky_init_check_options(td); if (ret < 0) return ret; ret = nlsolve_alloc(td, &nl); if (ret < 0) goto fail; ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], 0.0, &ce); if (ret < 0) goto fail; if (fabs(td->amplitude) >= ce->a_diverge) { tdi_log(&s->logger, 0, "Amplitude A=%16.16g is above the point A_{max}=%g, no solutions " "are known to exist there. Set solution_branch=1 to get to the " "second branch where mass increases with decreasing amplitude\n", td->amplitude, ce->a_converge); } a0 = ce->a_converge; tdi_constraint_eval_free(&ce); if (coeffs_init) { for (int i = 0; i < 3; i++) { memcpy(td->coeffs[i], coeffs_init[i], sizeof(*td->coeffs[i]) * td->nb_coeffs[0] * td->nb_coeffs[1]); } } if (td->solution_branch == 0 || coeffs_init) { // direct solve with default (flat space) or user-provided initial guess ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], td->amplitude, &ce); if (ret < 0) goto fail; ret = tdi_nlsolve_solve(nl, teukolsky_constraint_eval, NULL, ce, s->coeffs, 0); tdi_constraint_eval_free(&ce); if (ret < 0) { tdi_log(&s->logger, 0, "tdi_nlsolve_solve() failed: %d", ret); goto fail; } } else { // second branch requested and no user-provided initial guess // execute two lower-branch solutions and extrapolate to get to the upper branch const int N = NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]; const double delta = 1e-5; const double a1 = a0 - delta; double cur_amplitude, new_amplitude, step; ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a0, &ce); if (ret < 0) goto fail; ret = tdi_nlsolve_solve(nl, teukolsky_constraint_eval, NULL, ce, s->coeffs, 0); tdi_constraint_eval_free(&ce); if (ret < 0) goto fail; ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); if (ret < 0) goto fail; ret = tdi_nlsolve_solve(nl, teukolsky_constraint_eval, NULL, ce, s->coeffs_tmp, 0); tdi_constraint_eval_free(&ce); if (ret < 0) goto fail; cblas_daxpy(N, -1.0, s->coeffs, 1, s->coeffs_tmp, 1); cblas_daxpy(N, -1.0, s->coeffs_tmp, 1, s->coeffs, 1); // obtain solution for a1 in the upper branch ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], a1, &ce); if (ret < 0) goto fail; ret = tdi_nlsolve_solve(nl, teukolsky_constraint_eval, NULL, ce, s->coeffs, 0); tdi_constraint_eval_free(&ce); if (ret < 0) { tdi_log(&s->logger, 0, "Failed to get into the upper branch\n"); goto fail; } cur_amplitude = a1; step = (td->amplitude - cur_amplitude) / 128.0; while (fabs(cur_amplitude - td->amplitude) > DBL_EPSILON) { new_amplitude = cur_amplitude + step; if (SGN(step) != SGN(td->amplitude - new_amplitude)) new_amplitude = td->amplitude; tdi_log(&s->logger, 2, "Trying amplitude %g\n", new_amplitude); ret = constraint_eval_alloc(td, td->nb_coeffs, nl->colloc_grid[0], new_amplitude, &ce); if (ret < 0) goto fail; memcpy(s->coeffs_tmp, s->coeffs, sizeof(*s->coeffs) * N); ret = tdi_nlsolve_solve(nl, teukolsky_constraint_eval, NULL, ce, s->coeffs_tmp, 1); tdi_constraint_eval_free(&ce); if (ret == -EDOM) { step *= 0.5; if (fabs(step) < 1e-5) goto fail; continue; } else if (ret < 0) goto fail; if (ret <= nl->maxiter / 2) step *= 1.75; cur_amplitude = new_amplitude; memcpy(s->coeffs, s->coeffs_tmp, sizeof(*s->coeffs) * N); } } finish: ret = 0; tdi_nlsolve_print_stats(nl); fail: tdi_nlsolve_context_free(&nl); return ret; } static void log_default_callback(const TDContext *td, int level, const char *fmt, va_list vl) { vfprintf(stderr, fmt, vl); } TDContext *td_context_alloc(void) { TDContext *td = calloc(1, sizeof(*td)); if (!td) return NULL; td->nb_threads = 1; td->family = TD_FAMILY_TIME_ANTISYM_CUBIC; td->solution_branch = 0; td->amplitude = 1.0; td->nb_coeffs[0] = 32; td->nb_coeffs[1] = 16; td->basis_scale_factor[0] = 3.0; td->basis_scale_factor[1] = 3.0; td->max_iter = 16; td->atol = 1e-12; td->log_callback = log_default_callback; td->priv = calloc(1, sizeof(TDPriv)); if (!td->priv) { free(td); return NULL; } return td; } void td_context_free(TDContext **ptd) { TDContext *td = *ptd; TDPriv *s; if (!td) return; s = td->priv; tdi_threadpool_free(&s->tp); #if HAVE_OPENCL if (s->ocl_queue) clReleaseCommandQueue(s->ocl_queue); if (s->ocl_ctx) clReleaseContext(s->ocl_ctx); #endif for (int i = 0; i < ARRAY_ELEMS(s->basis); i++) for (int j = 0; j < ARRAY_ELEMS(s->basis[i]); j++) tdi_basis_free(&s->basis[i][j]); free(s->coeffs); free(s->coeffs_tmp); free(s->coeffs_lapse); free(td->priv); free(td); *ptd = NULL; } typedef struct MaximalSlicingEval { double *psi; double *dpsi_r; double *dpsi_t; double *krr; double *kpp; double *krt; } MaximalSlicingEval; static int maximal_slicing_eval(void *opaque, unsigned int eq_idx, const unsigned int *colloc_grid_order, const double * const *colloc_grid, const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB], double *dst) { MaximalSlicingEval *max = opaque; for (int idx1 = 0; idx1 < colloc_grid_order[1]; idx1++) { const double theta = colloc_grid[1][idx1]; const double st = sin(theta); const double st2 = SQR(st); const double ct = cos(theta); for (int idx0 = 0; idx0 < colloc_grid_order[0]; idx0++) { const double r = colloc_grid[0][idx0]; const double r2 = SQR(r); const int idx = idx1 * colloc_grid_order[0] + idx0; const double alpha = vars[0][PSSOLVE_DIFF_ORDER_00][idx] + 1.0; const double dalpha_r = vars[0][PSSOLVE_DIFF_ORDER_10][idx]; const double dalpha_t = vars[0][PSSOLVE_DIFF_ORDER_01][idx]; const double d2alpha_rr = vars[0][PSSOLVE_DIFF_ORDER_20][idx]; const double d2alpha_tt = vars[0][PSSOLVE_DIFF_ORDER_02][idx]; const double d2alpha_rt = vars[0][PSSOLVE_DIFF_ORDER_11][idx]; const double d2alpha[3][3] = { { d2alpha_rr, d2alpha_rt, 0.0 }, { d2alpha_rt, d2alpha_tt, 0.0 }, { 0.0, 0.0, 0.0 }, }; const double psi = max->psi[idx]; const double psi2 = SQR(psi); const double psi3 = psi * psi2; const double psi4 = SQR(psi2); const double dpsi_r = max->dpsi_r[idx]; const double dpsi_t = max->dpsi_t[idx]; const double g[3][3] = { { psi4, 0.0, 0.0 }, { 0.0, psi4 * r2, 0.0 }, { 0.0, 0.0, psi4 * r2 * st2}, }; const double gu[3][3] = { { 1.0 / psi4, 0.0, 0.0 }, { 0.0, 1.0 / (psi4 * r2), 0.0 }, { 0.0, 0.0, 1.0 / (psi4 * r2 * st2) }, }; const double dg[3][3][3] = { { { 4.0 * dpsi_r * psi3, 0.0, 0.0 }, { 0.0, 4.0 * dpsi_r * psi3 * r2 + 2.0 * r * psi4, 0.0 }, { 0.0, 0.0, 4.0 * dpsi_r * psi3 * r2 * st2 + 2.0 * r * psi4 * st2 }, }, { { 4.0 * dpsi_t * psi3, 0.0, 0.0 }, { 0.0, 4.0 * dpsi_t * psi3 * r2, 0.0 }, { 0.0, 0.0, 4.0 * dpsi_t * psi3 * r2 * st2 + 2.0 * psi4 * r2 * st * ct }, }, { { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0 }, }, }; const double krr = max->krr[idx]; const double kpp = max->kpp[idx]; const double krt = max->krt[idx]; const double Km[3][3] = { { krr, krt, 0.0 }, { krt / r2, -(krr + kpp), 0.0 }, { 0.0, 0.0, kpp }, }; double G[3][3][3], X[3]; double laplace_alpha, k2; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gu[i][l] * (dg[k][j][l] + dg[j][k][l] - dg[l][j][k]); G[i][j][k] = 0.5 * val; } for (int i = 0; i < 3; i++) { double val = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) val += gu[j][k] * G[i][j][k]; X[i] = val; } laplace_alpha = 0.0; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) laplace_alpha += gu[i][j] * d2alpha[i][j]; laplace_alpha -= X[0] * dalpha_r + X[1] * dalpha_t; k2 = 0.0; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) k2 += Km[i][j] * Km[j][i]; dst[idx] = laplace_alpha - alpha * k2; } } return 0; } static int lapse_solve_max(const TDContext *td) { MaximalSlicingEval max = { NULL }; TDPriv *priv = td->priv; NLSolveContext *nl; double *coords_r = NULL; double *coords_t = NULL; double *coeffs; int ret = 0; ret = tdi_nlsolve_context_alloc(&nl, 1); if (ret < 0) { tdi_log(&priv->logger, 0, "Error allocating the non-linear solver\n"); return ret; } nl->logger = priv->logger; nl->tp = priv->tp; nl->maxiter = td->max_iter; nl->atol = td->atol; nl->basis[0][0] = priv->basis[0][0]; nl->basis[0][1] = priv->basis[0][1]; nl->solve_order[0][0] = priv->basis_order[0][0]; nl->solve_order[0][1] = priv->basis_order[0][1]; #if HAVE_OPENCL nl->ocl_ctx = priv->ocl_ctx; nl->ocl_queue = priv->ocl_queue; #endif ret = tdi_nlsolve_context_init(nl); if (ret < 0) { tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n"); goto fail; } ret = posix_memalign((void**)&max.psi, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.dpsi_r, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.dpsi_t, 32, sizeof(*max.psi) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.krr, 32, sizeof(*max.krr) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.kpp, 32, sizeof(*max.kpp) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&max.krt, 32, sizeof(*max.krt) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coords_r, 32, sizeof(*coords_r) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coords_t, 32, sizeof(*coords_t) * td->nb_coeffs[0] * td->nb_coeffs[1]); ret |= posix_memalign((void**)&coeffs, 32, sizeof(*coeffs) * td->nb_coeffs[0] * td->nb_coeffs[1]); if (ret) { ret = -ENOMEM; goto fail; } for (int j = 0; j < td->nb_coeffs[1]; j++) { for (int i = 0; i < td->nb_coeffs[0]; i++) { coords_r[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][0][i]; coords_t[j * td->nb_coeffs[0] + i] = nl->colloc_grid[0][1][j]; } } td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.psi); td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 1, 0 }, max.dpsi_r); td_eval_psi(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 1 }, max.dpsi_t); td_eval_krr(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.krr); td_eval_kpp(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.kpp); td_eval_krt(td, td->nb_coeffs[0] * td->nb_coeffs[1], coords_r, coords_t, (const unsigned int [2]){ 0, 0 }, max.krt); ret = tdi_nlsolve_solve(nl, maximal_slicing_eval, NULL, &max, coeffs, 0); if (ret < 0) { tdi_log(&priv->logger, 0, "Error solving the maximal slicing equation\n"); goto fail; } priv->coeffs_lapse = coeffs; coeffs = NULL; fail: free(coords_r); free(coords_t); free(coeffs); free(max.psi); free(max.dpsi_r); free(max.dpsi_t); free(max.krr); free(max.kpp); free(max.krt); tdi_nlsolve_context_free(&nl); return ret; } static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat, double *vec1, double *vec2) { double val = 0.0; for (int m = 0; m < len2; m++) { double tmp = 0.0; for (int n = 0; n < len1; n++) tmp += mat[m * len1 + n] * vec1[n]; val += tmp * vec2[m]; } return val; } static int eval_var(const TDContext *td, unsigned int var_idx, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { TDPriv *s = td->priv; double *basis_val[2] = { NULL }; int ret = 0; if (diff_order[0] > 2 || diff_order[1] > 2) { tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); return -ENOSYS; } posix_memalign((void**)&basis_val[0], 32, sizeof(*basis_val[0]) * td->nb_coeffs[0]); posix_memalign((void**)&basis_val[1], 32, sizeof(*basis_val[1]) * td->nb_coeffs[1]); if (!basis_val[0] || !basis_val[1]) { ret = -ENOMEM; goto fail; } memset(basis_val[0], 0, sizeof(*basis_val[0]) * td->nb_coeffs[0]); memset(basis_val[1], 0, sizeof(*basis_val[1]) * td->nb_coeffs[1]); for (int i = 0; i < nb_coords; i++) { double theta_val = theta[i]; double r_val = r[i]; double val = (var_idx == 0 && diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; for (int k = 0; k < td->nb_coeffs[0]; k++) basis_val[0][k] = tdi_basis_eval(s->basis[var_idx][0], diff_order[0], r_val, k); for (int k = 0; k < td->nb_coeffs[1]; k++) basis_val[1][k] = tdi_basis_eval(s->basis[var_idx][1], diff_order[1], theta_val, k); val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], td->coeffs[var_idx], basis_val[0], basis_val[1]); out[i] = val; } fail: free(basis_val[0]); free(basis_val[1]); return ret; } int td_eval_psi(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { return eval_var(td, 0, nb_coords, r, theta, diff_order, out); } int td_eval_krr(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { return eval_var(td, 1, nb_coords, r, theta, diff_order, out); } int td_eval_kpp(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { return eval_var(td, 2, nb_coords, r, theta, diff_order, out); } int td_eval_krt(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { static const double dummy_coord = 0.0; static const double *dummy_coords[2] = { &dummy_coord, &dummy_coord }; static const unsigned int nb_dummy_coords[2] = { 1, 1 }; TDConstraintEvalContext *ce; double (*eval)(TDConstraintEvalContext*, double, double); int ret; if (diff_order[0] == 0 && diff_order[1] == 0) eval = tdi_constraint_eval_k_rtheta; else if (diff_order[0] == 1 && diff_order[1] == 0) eval = tdi_constraint_eval_dk_rtheta_r; else if (diff_order[0] == 0 && diff_order[1] == 1) eval = tdi_constraint_eval_dk_rtheta_t; else return -ENOSYS; ret = constraint_eval_alloc(td, nb_dummy_coords, dummy_coords, td->amplitude, &ce); if (ret < 0) return ret; for (int i = 0; i < nb_coords; i++) { double theta_val = theta[i]; double r_val = r[i]; out[i] = eval(ce, r_val, theta_val); } tdi_constraint_eval_free(&ce); return 0; } int td_eval_lapse(const TDContext *td, size_t nb_coords, const double *r, const double *theta, const unsigned int diff_order[2], double *out) { TDPriv *priv = td->priv; double *basis_val[2] = { NULL }; int ret; if (!priv->coeffs_lapse) { ret = lapse_solve_max(td); if (ret < 0) return ret; } if (diff_order[0] > 2 || diff_order[1] > 2) { tdi_log(&priv->logger, 0, "Derivatives of higher order than 2 are not supported.\n"); return -ENOSYS; } posix_memalign((void**)&basis_val[0], 32, sizeof(*basis_val[0]) * td->nb_coeffs[0]); posix_memalign((void**)&basis_val[1], 32, sizeof(*basis_val[1]) * td->nb_coeffs[1]); if (!basis_val[0] || !basis_val[1]) { ret = -ENOMEM; goto fail; } memset(basis_val[0], 0, sizeof(*basis_val[0]) * td->nb_coeffs[0]); memset(basis_val[1], 0, sizeof(*basis_val[1]) * td->nb_coeffs[1]); for (int i = 0; i < nb_coords; i++) { double theta_val = theta[i]; double r_val = r[i]; double val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0; for (int k = 0; k < td->nb_coeffs[0]; k++) basis_val[0][k] = tdi_basis_eval(priv->basis[0][0], diff_order[0], r_val, k); for (int k = 0; k < td->nb_coeffs[1]; k++) basis_val[1][k] = tdi_basis_eval(priv->basis[0][1], diff_order[1], theta_val, k); val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], priv->coeffs_lapse, basis_val[0], basis_val[1]); out[i] = val; } fail: free(basis_val[0]); free(basis_val[1]); return 0; }