aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2020-07-26 19:08:23 +0200
committerAnton Khirnov <anton@khirnov.net>2020-07-26 19:08:23 +0200
commit57adceb6497e50355c2d6011ec4bb03ea10cb23b (patch)
treef226b5768c2b211b2af9f1e20fddc134a1788988
parent29f5b628317bcf83ddc5bfeaec92108b6bb1e89d (diff)
init: multi-thread solution evaluation
-rw-r--r--init.c145
1 files changed, 94 insertions, 51 deletions
diff --git a/init.c b/init.c
index f4d130f..6467988 100644
--- a/init.c
+++ b/init.c
@@ -750,13 +750,58 @@ static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
return val;
}
+typedef struct EvalVarData {
+ const TDContext *td;
+
+ const BasisSetContext *basis[2];
+
+ const double *coeffs;
+
+ const double *theta;
+ const double *r;
+
+ double *basis_val[2];
+ double *out;
+
+ double diff_order[2];
+ double init_val;
+} EvalVarData;
+
+static int eval_var_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ EvalVarData *d = arg;
+ const TDContext *td = d->td;
+ const TDPriv *s = td->priv;
+
+ double *basis_val[2] = {
+ d->basis_val[0] + thread_idx * td->nb_coeffs[0],
+ d->basis_val[1] + thread_idx * td->nb_coeffs[1],
+ };
+
+ double theta_val = d->theta[job_idx];
+ double r_val = d->r[job_idx];
+
+ double val = d->init_val;
+
+ for (int k = 0; k < td->nb_coeffs[0]; k++)
+ basis_val[0][k] = tdi_basis_eval(d->basis[0], d->diff_order[0], r_val, k);
+ for (int k = 0; k < td->nb_coeffs[1]; k++)
+ basis_val[1][k] = tdi_basis_eval(d->basis[1], d->diff_order[1], theta_val, k);
+
+ val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], d->coeffs,
+ basis_val[0], basis_val[1]);
+
+ d->out[job_idx] = val;
+}
+
static int eval_var(const TDContext *td, unsigned int var_idx,
size_t nb_coords, const double *r, const double *theta,
const unsigned int diff_order[2],
double *out)
{
TDPriv *s = td->priv;
- double *basis_val[2] = { NULL };
+ const int nb_threads = tp_get_nb_threads(s->tp);
+ EvalVarData thread_data = { NULL };
int ret = 0;
if (diff_order[0] > 2 || diff_order[1] > 2) {
@@ -764,36 +809,34 @@ static int eval_var(const TDContext *td, unsigned int var_idx,
return -ENOSYS;
}
- posix_memalign((void**)&basis_val[0], 32, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
- posix_memalign((void**)&basis_val[1], 32, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
- if (!basis_val[0] || !basis_val[1]) {
- ret = -ENOMEM;
- goto fail;
+ for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) {
+ const size_t alloc_elems = td->nb_coeffs[i] * nb_threads;
+ ret = posix_memalign((void**)&thread_data.basis_val[i], 32,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(thread_data.basis_val[i], 0,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
}
- memset(basis_val[0], 0, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
- memset(basis_val[1], 0, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
- for (int i = 0; i < nb_coords; i++) {
- double theta_val = theta[i];
- double r_val = r[i];
-
- double val = (var_idx == 0 && diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
-
- for (int k = 0; k < td->nb_coeffs[0]; k++)
- basis_val[0][k] = tdi_basis_eval(s->basis[var_idx][0], diff_order[0], r_val, k);
- for (int k = 0; k < td->nb_coeffs[1]; k++)
- basis_val[1][k] = tdi_basis_eval(s->basis[var_idx][1], diff_order[1], theta_val, k);
-
- val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], td->coeffs[var_idx],
- basis_val[0], basis_val[1]);
-
- out[i] = val;
- }
+ thread_data.td = td;
+ thread_data.r = r;
+ thread_data.theta = theta;
+ thread_data.out = out;
+ thread_data.diff_order[0] = diff_order[0];
+ thread_data.diff_order[1] = diff_order[1];
+ thread_data.basis[0] = s->basis[var_idx][0];
+ thread_data.basis[1] = s->basis[var_idx][1];
+ thread_data.coeffs = td->coeffs[var_idx];
+ thread_data.init_val = (var_idx == 0 && diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
+ tp_execute(s->tp, nb_coords, eval_var_kernel, &thread_data);
fail:
- free(basis_val[0]);
- free(basis_val[1]);
+ free(thread_data.basis_val[0]);
+ free(thread_data.basis_val[1]);
return ret;
}
@@ -866,7 +909,8 @@ int td_eval_lapse(const TDContext *td,
double *out)
{
TDPriv *priv = td->priv;
- double *basis_val[2] = { NULL };
+ const int nb_threads = tp_get_nb_threads(priv->tp);
+ EvalVarData thread_data = { NULL };
int ret;
if (!priv->coeffs_lapse) {
@@ -880,35 +924,34 @@ int td_eval_lapse(const TDContext *td,
return -ENOSYS;
}
- posix_memalign((void**)&basis_val[0], 32, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
- posix_memalign((void**)&basis_val[1], 32, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
- if (!basis_val[0] || !basis_val[1]) {
- ret = -ENOMEM;
- goto fail;
+ for (int i = 0; i < ARRAY_ELEMS(thread_data.basis_val); i++) {
+ const size_t alloc_elems = td->nb_coeffs[i] * nb_threads;
+ ret = posix_memalign((void**)&thread_data.basis_val[i], 32,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(thread_data.basis_val[i], 0,
+ sizeof(*thread_data.basis_val[i]) * alloc_elems);
}
- memset(basis_val[0], 0, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
- memset(basis_val[1], 0, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
- for (int i = 0; i < nb_coords; i++) {
- double theta_val = theta[i];
- double r_val = r[i];
+ thread_data.td = td;
+ thread_data.r = r;
+ thread_data.theta = theta;
+ thread_data.out = out;
+ thread_data.diff_order[0] = diff_order[0];
+ thread_data.diff_order[1] = diff_order[1];
+ thread_data.basis[0] = priv->basis[0][0];
+ thread_data.basis[1] = priv->basis[0][1];
+ thread_data.coeffs = priv->coeffs_lapse;
+ thread_data.init_val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
- double val = (diff_order[0] == 0 && diff_order[1] == 0) ? 1.0 : 0.0;
-
- for (int k = 0; k < td->nb_coeffs[0]; k++)
- basis_val[0][k] = tdi_basis_eval(priv->basis[0][0], diff_order[0], r_val, k);
- for (int k = 0; k < td->nb_coeffs[1]; k++)
- basis_val[1][k] = tdi_basis_eval(priv->basis[0][1], diff_order[1], theta_val, k);
-
- val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], priv->coeffs_lapse,
- basis_val[0], basis_val[1]);
-
- out[i] = val;
- }
+ tp_execute(priv->tp, nb_coords, eval_var_kernel, &thread_data);
fail:
- free(basis_val[0]);
- free(basis_val[1]);
+ free(thread_data.basis_val[0]);
+ free(thread_data.basis_val[1]);
return 0;
}