diff options
author | Anton Khirnov <anton@khirnov.net> | 2018-12-27 16:26:29 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2018-12-27 16:26:29 +0100 |
commit | 70186149b931a49f1eb599d2f207fdc3d3048388 (patch) | |
tree | e8f825af87c92d30d20f1ca4fb44c1e57a5fe25a /ell_relax.c | |
parent | 7606e74f1a0310c1031aad48f77a7c701791025a (diff) |
Add threading support through libthreadpool.
Diffstat (limited to 'ell_relax.c')
-rw-r--r-- | ell_relax.c | 60 |
1 files changed, 44 insertions, 16 deletions
diff --git a/ell_relax.c b/ell_relax.c index a3f8968..042c4a5 100644 --- a/ell_relax.c +++ b/ell_relax.c @@ -22,6 +22,7 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <threadpool.h> #include "common.h" #include "cpu.h" @@ -48,6 +49,8 @@ struct EllRelaxInternal { const double *fd_factors); double fd_factors[ELL_RELAX_DIFF_COEFF_NB]; double relax_factor; + + TPContext *tp_internal; }; static const struct { @@ -198,23 +201,29 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, } } +static void residual_calc_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + EllRelaxContext *ctx = arg; + EllRelaxInternal *priv = ctx->priv; + const ptrdiff_t offset = job_idx * priv->stride; + const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB]; + + for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) + diff_coeffs[i] = ctx->diff_coeffs[i] + offset; + + priv->residual_calc_line(ctx->domain_size[0], ctx->residual + offset, + priv->stride, ctx->u + offset, ctx->rhs + offset, + diff_coeffs, priv->fd_factors); +} + static void residual_calc(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; int64_t start; start = gettime(); - for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) { - const ptrdiff_t offset = idx1 * priv->stride; - const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB]; - for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = ctx->diff_coeffs[i] + offset; - - priv->residual_calc_line(ctx->domain_size[0], ctx->residual + offset, - priv->stride, ctx->u + offset, ctx->rhs + offset, - diff_coeffs, priv->fd_factors); - } + tp_execute(ctx->tp, ctx->domain_size[1], residual_calc_task, ctx); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const ptrdiff_t strides[2] = { 1, priv->stride }; @@ -329,19 +338,29 @@ static void boundaries_apply(EllRelaxContext *ctx) ctx->count_boundaries++; } +static void residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + EllRelaxContext *ctx = arg; + EllRelaxInternal *priv = ctx->priv; + ptrdiff_t offset = job_idx * priv->stride; + + for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { + ptrdiff_t idx = job_idx * ctx->u_stride + idx0; + + ctx->u[idx] += priv->relax_factor * ctx->residual[idx]; + } +} + int mg2di_ell_relax_step(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; - const double cfl_fac = priv->relax_factor * ctx->step[0] * ctx->step[1]; + const double cfl_fac = priv->relax_factor; int64_t start; start = gettime(); - for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) - for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { - ptrdiff_t idx = idx1 * priv->stride + idx0; - ctx->u[idx] += cfl_fac * ctx->residual[idx]; - } + tp_execute(ctx->tp, ctx->domain_size[1], residual_add_task, ctx); + ctx->time_correct += gettime() - start; ctx->count_correct++; @@ -386,6 +405,7 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx) priv->relax_factor = relax_factors[ctx->fd_stencil - 1]; else priv->relax_factor = ctx->relax_factor; + priv->relax_factor *= ctx->step[0] * ctx->step[0]; priv->fd_factors[ELL_RELAX_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_00]; priv->fd_factors[ELL_RELAX_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_10] * ctx->step[0]); @@ -394,6 +414,13 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx) priv->fd_factors[ELL_RELAX_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_02] * SQR(ctx->step[1])); priv->fd_factors[ELL_RELAX_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); + if (!ctx->tp) { + ret = tp_init(&priv->tp_internal, 1); + if (ret < 0) + return ret; + ctx->tp = priv->tp_internal; + } + for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXDIFF) { for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++) @@ -517,6 +544,7 @@ void mg2di_ell_relax_free(EllRelaxContext **pctx) for (int i = 0; i < ARRAY_ELEMS(ctx->priv->boundary_val_base); i++) free(ctx->priv->boundary_val_base[i]); + tp_free(&ctx->priv->tp_internal); free(ctx->priv); free(ctx); |