aboutsummaryrefslogtreecommitdiff
path: root/eval_psi_radial_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'eval_psi_radial_template.c')
-rw-r--r--eval_psi_radial_template.c155
1 files changed, 155 insertions, 0 deletions
diff --git a/eval_psi_radial_template.c b/eval_psi_radial_template.c
new file mode 100644
index 0000000..3825511
--- /dev/null
+++ b/eval_psi_radial_template.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2014-2016 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file
+ * the template for the loop over the grid points
+*/
+
+#define FUNC3(a, b) a ## _ ## b
+#define FUNC2(a, b) FUNC3(a, b)
+#define FUNC(x) FUNC2(x, DIFF)
+
+#define DIFF_RHO (DIFF % 3)
+#define DIFF_Z (DIFF / 3)
+
+static void FUNC(eval_psi_radial)(void *args,
+ unsigned int job_idx, unsigned int nb_jobs,
+ unsigned int thread_idx, unsigned int nb_threads)
+{
+ EvalPsiArgs *a = ((EvalPsiArgs*)args) + job_idx;
+ const BDContext *bd = a->bd;
+ const BDPriv *s = bd->priv;
+ double *psi = a->psi;
+ ptrdiff_t psi_stride = a->psi_stride;
+ const double *rho = a->rho;
+ const double *z = a->z;
+ ptrdiff_t nb_coords_rho = a->nb_coords_rho;
+ ptrdiff_t nb_coords_z = a->nb_coords_z;
+ enum BSEvalType type;
+
+ int nb_coeffs_aligned[2] = { s->coeffs_stride, ALIGN(s->nb_coeffs[1], REQ_ALIGNMENT(double)) };
+
+ double *basis_val[2][3] = { { NULL } };
+
+ double add = DIFF ? 0.0 : 1.0;
+
+ int ret = 0;
+
+ for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) {
+ posix_memalign((void**)&basis_val[0][i], REQ_ALIGNMENT(char), sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]);
+ posix_memalign((void**)&basis_val[1][i], REQ_ALIGNMENT(char), sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]);
+ if (!basis_val[0][i] || !basis_val[1][i]) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(basis_val[0][i], 0, sizeof(*basis_val[0][i]) * nb_coeffs_aligned[0]);
+ memset(basis_val[1][i], 0, sizeof(*basis_val[1][i]) * nb_coeffs_aligned[1]);
+ }
+
+#define BASIS0 (basis_val[0][0][n])
+#define DBASIS0 (basis_val[0][1][n])
+#define D2BASIS0 (basis_val[0][2][n])
+#define BASIS1 (basis_val[1][0][m])
+#define DBASIS1 (basis_val[1][1][m])
+#define D2BASIS1 (basis_val[1][2][m])
+
+ for (int i = 0; i < nb_coords_z; i++) {
+ double *dst = psi + i * psi_stride;
+ double zz = z[i];
+
+ for (int j = 0; j < nb_coords_rho; j++) {
+ double ppsi = add;
+ double rrho = rho[j];
+
+ double r = sqrt(SQR(rrho) + SQR(zz));
+ double phi = atan2(zz, rrho);
+
+ bdi_basis_eval_multi(s->basis[0], r, basis_val[0], 0, s->nb_coeffs[0]);
+ bdi_basis_eval_multi(s->basis[1], phi, basis_val[1], 0, s->nb_coeffs[1]);
+
+ switch (DIFF) {
+ case 0:
+ ppsi += s->scalarproduct_metric(nb_coeffs_aligned[0], nb_coeffs_aligned[1], s->coeffs,
+ basis_val[0][0], basis_val[1][0]);
+ break;
+ case 1:
+ for (int m = 0; m < s->nb_coeffs[1]; m++) {
+ for (int n = 0; n < s->nb_coeffs[0]; n++) {
+ double basis_val_10 = ((r > EPS) ? (DBASIS0 * BASIS1 * rrho / r - BASIS0 * DBASIS1 * zz / SQR(r)) : 0.0);
+
+ ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_10;
+ }
+ }
+ break;
+ case 2:
+ for (int m = 0; m < s->nb_coeffs[1]; m++) {
+ for (int n = 0; n < s->nb_coeffs[0]; n++) {
+ double basis_val_20 = ((r > EPS) ? (SQR(rrho / r) * D2BASIS0 * BASIS1 + SQR(zz / SQR(r)) * BASIS0 * D2BASIS1
+ + (1.0 - SQR(rrho / r)) / r * DBASIS0 * BASIS1
+ + 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1
+ - 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+ ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_20;
+ }
+ }
+ break;
+ case 3:
+ for (int m = 0; m < s->nb_coeffs[1]; m++) {
+ for (int n = 0; n < s->nb_coeffs[0]; n++) {
+ double basis_val_01 = ((r > EPS) ? (DBASIS0 * BASIS1 * zz / r + BASIS0 * DBASIS1 * rrho / SQR(r)) : 0.0);
+
+ ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_01;
+ }
+ }
+ break;
+ case 4:
+ for (int m = 0; m < s->nb_coeffs[1]; m++) {
+ for (int n = 0; n < s->nb_coeffs[0]; n++) {
+ double basis_val_11 = ((r > EPS) ? (rrho * zz / SQR(r) * D2BASIS0 * BASIS1 - rrho * zz / SQR(SQR(r)) * BASIS0 * D2BASIS1
+ - rrho * zz / (r * SQR(r)) * DBASIS0 * BASIS1
+ - (1.0 - SQR(zz / r)) / SQR(r) * BASIS0 * DBASIS1
+ + (SQR(rrho) - SQR(zz)) / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+
+ ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_11;
+ }
+ }
+ break;
+ case 6:
+ for (int m = 0; m < s->nb_coeffs[1]; m++) {
+ for (int n = 0; n < s->nb_coeffs[0]; n++) {
+ double basis_val_02 = ((r > EPS) ? (SQR(zz / r) * D2BASIS0 * BASIS1 + SQR(rrho / SQR(r)) * BASIS0 * D2BASIS1
+ + (1.0 - SQR(zz / r)) / r * DBASIS0 * BASIS1
+ - 2 * rrho * zz / SQR(SQR(r)) * BASIS0 * DBASIS1
+ + 2 * zz * rrho / (r * SQR(r)) * DBASIS0 * DBASIS1) : 0.0);
+ ppsi += s->coeffs[m * s->coeffs_stride + n] * basis_val_02;
+ }
+ }
+ break;
+ }
+
+ *dst++ = ppsi;
+ }
+ }
+
+fail:
+ for (int i = 0; i < ARRAY_ELEMS(basis_val[0]); i++) {
+ free(basis_val[0][i]);
+ free(basis_val[1][i]);
+ }
+
+ a->ret = ret;
+}