From 24103bc5ae3d568ca97bdb70175b975fa680546c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 30 Jun 2017 09:44:25 +0200 Subject: Refactor conformal factor evaluation. --- eval_psi_radial_template.c | 155 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 eval_psi_radial_template.c (limited to 'eval_psi_radial_template.c') diff --git a/eval_psi_radial_template.c b/eval_psi_radial_template.c new file mode 100644 index 0000000..3825511 --- /dev/null +++ b/eval_psi_radial_template.c @@ -0,0 +1,155 @@ +/* + * Copyright 2014-2016 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * @file + * the template for the loop over the grid points +*/ + +#define FUNC3(a, b) a ## _ ## b +#define FUNC2(a, b) FUNC3(a, b) +#define FUNC(x) FUNC2(x, DIFF) + +#define DIFF_RHO (DIFF % 3) +#define DIFF_Z (DIFF / 3) + +static void FUNC(eval_psi_radial)(void *args, + unsigned int job_idx, unsigned int nb_jobs, + unsigned int thread_idx, unsigned int nb_threads) +{ + EvalPsiArgs *a = ((EvalPsiArgs*)args) + job_idx; + const BDContext *bd = a->bd; + const BDPriv *s = bd->priv; + double *psi = a->psi; + ptrdiff_t psi_stride = a->psi_stride; + const double *rho = a->rho; + const double *z = a->z; + ptrdiff_t nb_coords_rho = a->nb_coords_rho; + ptrdiff_t nb_coords_z = a->nb_coords_z; + enum BSEvalType type; + + int nb_coeffs_aligned[2] = { s->coeffs_stride, ALIGN(s->nb_coeffs[1], REQ_ALIGNMENT(double)) }; + + double *basis_val[2][3] = { { NULL } }; + + double add = DIFF ? 0.0 : 1.0; + + int ret = 0; + + for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) { + posix_memalign((void**)&basis_val[0][i], REQ_ALIGNMENT(char), sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]); + posix_memalign((void**)&basis_val[1][i], REQ_ALIGNMENT(char), sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]); + if (!basis_val[0][i] || !basis_val[1][i]) { + ret = -ENOMEM; + goto fail; + } + memset(basis_val[0][i], 0, sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]); + memset(basis_val[1][i], 0, sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]); + } + +#define BASIS0 (basis_val[0][0][n]) +#define DBASIS0 (basis_val[0][1][n]) +#define D2BASIS0 (basis_val[0][2][n]) +#define BASIS1 (basis_val[1][0][m]) +#define DBASIS1 (basis_val[1][1][m]) +#define D2BASIS1 (basis_val[1][2][m]) + + for (int i = 0; i < nb_coords_z; i++) { + double *dst = psi + i * psi_stride; + double zz = z[i]; + + for (int j = 0; j < nb_coords_rho; j++) { + double ppsi = add; + double rrho = rho[j]; + + double r = sqrt(SQR(rrho) + SQR(zz)); + double phi = atan2(zz, rrho); + + bdi_basis_eval_multi(s->basis[0], r, basis_val[0], 0, s->nb_coeffs[0]); + bdi_basis_eval_multi(s->basis[1], phi, basis_val[1], 0, s->nb_coeffs[1]); + + switch (DIFF) { + case 0: + ppsi += s->scalarproduct_metric(nb_coeffs_aligned[0], nb_coeffs_aligned[1], s->coeffs, + basis_val[0][0], basis_val[1][0]); + break; + case 1: + for (int m = 0; m < s->nb_coeffs[1]; m++) { + for (int n = 0; n < s->nb_coeffs[0]; n++) { + double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * rrho / r - BASIS0 * DBASIS1 * zz / SQR(r)) : 0.0); + + ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_10; + } + } + break; + case 2: + for (int m = 0; m < s->nb_coeffs[1]; m++) { + for (int n = 0; n < s->nb_coeffs[0]; n++) { + double basis_val_20 = ((r > EPS) ? (SQR(rrho / r) * D2BASIS0 * BASIS1 + SQR(zz / SQR(r)) * BASIS0 * D2BASIS1 + + (1.0 - SQR(rrho / r)) / r * DBASIS0 * BASIS1 + + 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1 + - 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); + ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_20; + } + } + break; + case 3: + for (int m = 0; m < s->nb_coeffs[1]; m++) { + for (int n = 0; n < s->nb_coeffs[0]; n++) { + double basis_val_01 = ((r > EPS) ? (DBASIS0 * BASIS1 * zz / r + BASIS0 * DBASIS1 * rrho / SQR(r)) : 0.0); + + ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_01; + } + } + break; + case 4: + for (int m = 0; m < s->nb_coeffs[1]; m++) { + for (int n = 0; n < s->nb_coeffs[0]; n++) { + double basis_val_11 = ((r > EPS) ? (rrho * zz / SQR(r) * D2BASIS0 * BASIS1 - rrho * zz / SQR(SQR(r)) * BASIS0 * D2BASIS1 + - rrho * zz / (r * SQR(r)) * DBASIS0 * BASIS1 + - (1.0 - SQR(zz / r)) / SQR(r) * BASIS0 * DBASIS1 + + (SQR(rrho) - SQR(zz)) / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); + + ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_11; + } + } + break; + case 6: + for (int m = 0; m < s->nb_coeffs[1]; m++) { + for (int n = 0; n < s->nb_coeffs[0]; n++) { + double basis_val_02 = ((r > EPS) ? (SQR(zz / r) * D2BASIS0 * BASIS1 + SQR(rrho / SQR(r)) * BASIS0 * D2BASIS1 + + (1.0 - SQR(zz / r)) / r * DBASIS0 * BASIS1 + - 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1 + + 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); + ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_02; + } + } + break; + } + + *dst++ = ppsi; + } + } + +fail: + for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) { + free(basis_val[0][i]); + free(basis_val[1][i]); + } + + a->ret = ret; +} -- cgit v1.2.3