/* * Copyright 2014-2016 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * @file * the template for the loop over the grid points */ #define FUNC3(a, b) a ## _ ## b #define FUNC2(a, b) FUNC3(a, b) #define FUNC(x) FUNC2(x, DIFF) #define DIFF_RHO (DIFF % 3) #define DIFF_Z (DIFF / 3) static void FUNC(eval_psi_radial)(void *args, unsigned int job_idx, unsigned int nb_jobs, unsigned int thread_idx, unsigned int nb_threads) { EvalPsiArgs *a = ((EvalPsiArgs*)args) + job_idx; const BDContext *bd = a->bd; const BDPriv *s = bd->priv; double *psi = a->psi; ptrdiff_t psi_stride = a->psi_stride; const double *rho = a->rho; const double *z = a->z; ptrdiff_t nb_coords_rho = a->nb_coords_rho; ptrdiff_t nb_coords_z = a->nb_coords_z; enum BSEvalType type; int nb_coeffs_aligned[2] = { s->coeffs_stride, ALIGN(s->nb_coeffs[1], REQ_ALIGNMENT(double)) }; double *basis_val[2][3] = { { NULL } }; double add = DIFF ? 0.0 : 1.0; int ret = 0; for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) { posix_memalign((void**)&basis_val[0][i], REQ_ALIGNMENT(char), sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]); posix_memalign((void**)&basis_val[1][i], REQ_ALIGNMENT(char), sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]); if (!basis_val[0][i] || !basis_val[1][i]) { ret = -ENOMEM; goto fail; } memset(basis_val[0][i], 0, sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]); memset(basis_val[1][i], 0, sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]); } #define BASIS0 (basis_val[0][0][n]) #define DBASIS0 (basis_val[0][1][n]) #define D2BASIS0 (basis_val[0][2][n]) #define BASIS1 (basis_val[1][0][m]) #define DBASIS1 (basis_val[1][1][m]) #define D2BASIS1 (basis_val[1][2][m]) for (int i = 0; i < nb_coords_z; i++) { double *dst = psi + i * psi_stride; double zz = z[i]; for (int j = 0; j < nb_coords_rho; j++) { double ppsi = add; double rrho = rho[j]; double r = sqrt(SQR(rrho) + SQR(zz)); double phi = atan2(zz, rrho); bdi_basis_eval_multi(s->basis[0], r, basis_val[0], 0, s->nb_coeffs[0]); bdi_basis_eval_multi(s->basis[1], phi, basis_val[1], 0, s->nb_coeffs[1]); switch (DIFF) { case 0: ppsi += s->scalarproduct_metric(nb_coeffs_aligned[0], nb_coeffs_aligned[1], s->coeffs, basis_val[0][0], basis_val[1][0]); break; case 1: for (int m = 0; m < s->nb_coeffs[1]; m++) { for (int n = 0; n < s->nb_coeffs[0]; n++) { double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * rrho / r - BASIS0 * DBASIS1 * zz / SQR(r)) : 0.0); ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_10; } } break; case 2: for (int m = 0; m < s->nb_coeffs[1]; m++) { for (int n = 0; n < s->nb_coeffs[0]; n++) { double basis_val_20 = ((r > EPS) ? (SQR(rrho / r) * D2BASIS0 * BASIS1 + SQR(zz / SQR(r)) * BASIS0 * D2BASIS1 + (1.0 - SQR(rrho / r)) / r * DBASIS0 * BASIS1 + 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1 - 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_20; } } break; case 3: for (int m = 0; m < s->nb_coeffs[1]; m++) { for (int n = 0; n < s->nb_coeffs[0]; n++) { double basis_val_01 = ((r > EPS) ? (DBASIS0 * BASIS1 * zz / r + BASIS0 * DBASIS1 * rrho / SQR(r)) : 0.0); ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_01; } } break; case 4: for (int m = 0; m < s->nb_coeffs[1]; m++) { for (int n = 0; n < s->nb_coeffs[0]; n++) { double basis_val_11 = ((r > EPS) ? (rrho * zz / SQR(r) * D2BASIS0 * BASIS1 - rrho * zz / SQR(SQR(r)) * BASIS0 * D2BASIS1 - rrho * zz / (r * SQR(r)) * DBASIS0 * BASIS1 - (1.0 - SQR(zz / r)) / SQR(r) * BASIS0 * DBASIS1 + (SQR(rrho) - SQR(zz)) / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_11; } } break; case 6: for (int m = 0; m < s->nb_coeffs[1]; m++) { for (int n = 0; n < s->nb_coeffs[0]; n++) { double basis_val_02 = ((r > EPS) ? (SQR(zz / r) * D2BASIS0 * BASIS1 + SQR(rrho / SQR(r)) * BASIS0 * D2BASIS1 + (1.0 - SQR(zz / r)) / r * DBASIS0 * BASIS1 - 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1 + 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0); ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_02; } } break; } *dst++ = ppsi; } } fail: for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) { free(basis_val[0][i]); free(basis_val[1][i]); } a->ret = ret; }