diff options
Diffstat (limited to 'eval_psi_radial.c')
-rw-r--r-- | eval_psi_radial.c | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/eval_psi_radial.c b/eval_psi_radial.c new file mode 100644 index 0000000..ee913d9 --- /dev/null +++ b/eval_psi_radial.c @@ -0,0 +1,140 @@ +/* + * Copyright 2014-2016 Anton Khirnov <anton@khirnov.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "config.h" +#include "internal.h" +#include "threadpool.h" + +typedef struct EvalPsiArgs { + const BDContext *bd; + double *psi; + ptrdiff_t psi_stride; + const double *rho; + const double *z; + int nb_coords_rho; + int nb_coords_z; + + int ret; +} EvalPsiArgs; + +#define DIFF 0 +#include "eval_psi_radial_template.c" +#undef DIFF + +#define DIFF 1 +#include "eval_psi_radial_template.c" +#undef DIFF + +#define DIFF 2 +#include "eval_psi_radial_template.c" +#undef DIFF + +#define DIFF 3 +#include "eval_psi_radial_template.c" +#undef DIFF + +#define DIFF 4 +#include "eval_psi_radial_template.c" +#undef DIFF + +#define DIFF 6 +#include "eval_psi_radial_template.c" +#undef DIFF + +typedef void (*EvalPsiRadialFunc)(void *args, + unsigned int job_idx, unsigned int nb_jobs, + unsigned int thread_idx, unsigned int nb_threads); + +static const EvalPsiRadialFunc eval_psi_radial_tab[9] = { + [0] = eval_psi_radial_0, + [1] = eval_psi_radial_1, + [2] = eval_psi_radial_2, + [3] = eval_psi_radial_3, + [4] = eval_psi_radial_4, + [6] = eval_psi_radial_6, +}; + +double bdi_scalarproduct_metric_c(size_t len1, size_t len2, double *mat, + double *vec1, double *vec2) +{ + double val = 0.0; + for (int m = 0; m < len2; m++) { + double tmp = 0.0; + for (int n = 0; n < len1; n++) + tmp += mat[m * len1 + n] * vec1[n]; + val += tmp * vec2[m]; + } + return val; +} + +int bdi_eval_psi_radial(const BDContext *bd, + const double *rho, int nb_coords_rho, + const double *z, int nb_coords_z, + const unsigned int diff_order[2], + double *psi, ptrdiff_t psi_stride) +{ + ThreadPoolContext *tctx; + BDPriv *s = bd->priv; + int diff_idx = diff_order[1] * 3 + diff_order[0]; + int block_size = (nb_coords_z + bd->nb_threads - 1) / bd->nb_threads; + + EvalPsiArgs *args; + int ret; + + args = calloc(bd->nb_threads, sizeof(*args)); + if (!args) + return -ENOMEM; + + ret = bdi_threadpool_init(&tctx, bd->nb_threads); + if (ret < 0) { + free(args); + return ret; + } + + for (int i = 0; i < bd->nb_threads; i++) { + EvalPsiArgs *a = &args[i]; + + a->bd = bd; + a->rho = rho; + a->nb_coords_rho = nb_coords_rho; + + a->psi_stride = psi_stride; + a->psi = psi + i * block_size * psi_stride; + a->z = z + i * block_size; + a->nb_coords_z = MIN(block_size, nb_coords_z - i * block_size); + } + + bdi_threadpool_execute(tctx, bd->nb_threads, + eval_psi_radial_tab[diff_idx], args); + + bdi_threadpool_free(&tctx); + + for (int i = 0; i < bd->nb_threads; i++) + if (args[i].ret < 0) { + ret = args[i].ret; + break; + } + + free(args); + + return ret; +} |