summaryrefslogtreecommitdiff
path: root/ell_relax.c
diff options
context:
space:
mode:
Diffstat (limited to 'ell_relax.c')
-rw-r--r--ell_relax.c60
1 files changed, 44 insertions, 16 deletions
diff --git a/ell_relax.c b/ell_relax.c
index a3f8968..042c4a5 100644
--- a/ell_relax.c
+++ b/ell_relax.c
@@ -22,6 +22,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <threadpool.h>
#include "common.h"
#include "cpu.h"
@@ -48,6 +49,8 @@ struct EllRelaxInternal {
const double *fd_factors);
double fd_factors[ELL_RELAX_DIFF_COEFF_NB];
double relax_factor;
+
+ TPContext *tp_internal;
};
static const struct {
@@ -198,23 +201,29 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst,
}
}
+static void residual_calc_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ EllRelaxContext *ctx = arg;
+ EllRelaxInternal *priv = ctx->priv;
+ const ptrdiff_t offset = job_idx * priv->stride;
+ const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB];
+
+ for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
+ diff_coeffs[i] = ctx->diff_coeffs[i] + offset;
+
+ priv->residual_calc_line(ctx->domain_size[0], ctx->residual + offset,
+ priv->stride, ctx->u + offset, ctx->rhs + offset,
+ diff_coeffs, priv->fd_factors);
+}
+
static void residual_calc(EllRelaxContext *ctx)
{
EllRelaxInternal *priv = ctx->priv;
int64_t start;
start = gettime();
- for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) {
- const ptrdiff_t offset = idx1 * priv->stride;
- const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB];
- for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = ctx->diff_coeffs[i] + offset;
-
- priv->residual_calc_line(ctx->domain_size[0], ctx->residual + offset,
- priv->stride, ctx->u + offset, ctx->rhs + offset,
- diff_coeffs, priv->fd_factors);
- }
+ tp_execute(ctx->tp, ctx->domain_size[1], residual_calc_task, ctx);
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) {
const ptrdiff_t strides[2] = { 1, priv->stride };
@@ -329,19 +338,29 @@ static void boundaries_apply(EllRelaxContext *ctx)
ctx->count_boundaries++;
}
+static void residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ EllRelaxContext *ctx = arg;
+ EllRelaxInternal *priv = ctx->priv;
+ ptrdiff_t offset = job_idx * priv->stride;
+
+ for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
+ ptrdiff_t idx = job_idx * ctx->u_stride + idx0;
+
+ ctx->u[idx] += priv->relax_factor * ctx->residual[idx];
+ }
+}
+
int mg2di_ell_relax_step(EllRelaxContext *ctx)
{
EllRelaxInternal *priv = ctx->priv;
- const double cfl_fac = priv->relax_factor * ctx->step[0] * ctx->step[1];
+ const double cfl_fac = priv->relax_factor;
int64_t start;
start = gettime();
- for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
- for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
- ptrdiff_t idx = idx1 * priv->stride + idx0;
- ctx->u[idx] += cfl_fac * ctx->residual[idx];
- }
+ tp_execute(ctx->tp, ctx->domain_size[1], residual_add_task, ctx);
+
ctx->time_correct += gettime() - start;
ctx->count_correct++;
@@ -386,6 +405,7 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx)
priv->relax_factor = relax_factors[ctx->fd_stencil - 1];
else
priv->relax_factor = ctx->relax_factor;
+ priv->relax_factor *= ctx->step[0] * ctx->step[0];
priv->fd_factors[ELL_RELAX_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_00];
priv->fd_factors[ELL_RELAX_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_10] * ctx->step[0]);
@@ -394,6 +414,13 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx)
priv->fd_factors[ELL_RELAX_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_02] * SQR(ctx->step[1]));
priv->fd_factors[ELL_RELAX_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
+ if (!ctx->tp) {
+ ret = tp_init(&priv->tp_internal, 1);
+ if (ret < 0)
+ return ret;
+ ctx->tp = priv->tp_internal;
+ }
+
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) {
if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXDIFF) {
for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++)
@@ -517,6 +544,7 @@ void mg2di_ell_relax_free(EllRelaxContext **pctx)
for (int i = 0; i < ARRAY_ELEMS(ctx->priv->boundary_val_base); i++)
free(ctx->priv->boundary_val_base[i]);
+ tp_free(&ctx->priv->tp_internal);
free(ctx->priv);
free(ctx);