summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-05-16 14:37:08 +0200
committerAnton Khirnov <anton@khirnov.net>2019-05-16 14:37:08 +0200
commitfb3db930b9ba323ee6b5978f723b7241e3e2797e (patch)
tree11e8d1ea89b006a433628132521be6875a827ce7
parente064b250449ea94ee34ed8f92252a6a846c20d15 (diff)
tmptp
-rw-r--r--residual_calc.c73
1 files changed, 43 insertions, 30 deletions
diff --git a/residual_calc.c b/residual_calc.c
index 3b83c63..8e6f66d 100644
--- a/residual_calc.c
+++ b/residual_calc.c
@@ -30,7 +30,11 @@
#include "mg2d_constants.h"
#include "residual_calc.h"
+#define MIN_ELEM_PER_TASK 1
+
typedef struct ResidualCalcTask {
+ size_t lines_total;
+ size_t lines_per_task;
size_t line_size;
double *dst;
@@ -248,35 +252,41 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre
ResidualCalcInternal *priv = arg;
ResidualCalcTask *task = &priv->task;
- const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
- double *dst = task->dst + job_idx * task->dst_stride;
-
- for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride;
-
- if (task->u_mult == 0.0) {
- priv->residual_calc_line(task->line_size, dst,
- priv->residual_max + thread_idx * priv->calc_blocksize,
- task->u_stride, task->u + job_idx * task->u_stride,
- task->rhs + job_idx * task->rhs_stride,
- diff_coeffs, task->res_mult);
- } else {
- priv->residual_add_line(task->line_size, dst,
- priv->residual_max + thread_idx * priv->calc_blocksize,
- task->u_stride, task->u + job_idx * task->u_stride,
- task->rhs + job_idx * task->rhs_stride,
- diff_coeffs, task->res_mult, task->u_mult);
- }
- if (task->mirror_line & (1 << 0)) {
- for (int i = 1; i <= FD_STENCIL_MAX; i++)
- dst[-i] = dst[i];
- }
- if (task->mirror_line & (1 << 1)) {
- for (int i = 1; i <= FD_STENCIL_MAX; i++)
- dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i];
- }
- if ((task->mirror_line & (1 << 2)) && job_idx > 0 && job_idx <= FD_STENCIL_MAX) {
- memcpy(task->dst - job_idx * task->dst_stride, dst, sizeof(*dst) * task->line_size);
+ const size_t line_min = job_idx * task->lines_per_task;
+ const size_t line_max = MIN((job_idx + 1) * task->lines_per_task, task->lines_total);
+
+ for (size_t line = line_min; line < line_max; line++) {
+ double *dst = task->dst + line * task->dst_stride;
+ const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
+
+ for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
+ diff_coeffs[i] = task->diff_coeffs[i] + line * task->diff_coeffs_stride;
+
+ if (task->u_mult == 0.0) {
+ priv->residual_calc_line(task->line_size, dst,
+ priv->residual_max + thread_idx * priv->calc_blocksize,
+ task->u_stride, task->u + line * task->u_stride,
+ task->rhs + line * task->rhs_stride,
+ diff_coeffs, task->res_mult);
+ } else {
+ priv->residual_add_line(task->line_size, dst,
+ priv->residual_max + thread_idx * priv->calc_blocksize,
+ task->u_stride, task->u + line * task->u_stride,
+ task->rhs + line * task->rhs_stride,
+ diff_coeffs, task->res_mult, task->u_mult);
+ }
+
+ if (task->mirror_line & (1 << 0)) {
+ for (int i = 1; i <= FD_STENCIL_MAX; i++)
+ dst[-i] = dst[i];
+ }
+ if (task->mirror_line & (1 << 1)) {
+ for (int i = 1; i <= FD_STENCIL_MAX; i++)
+ dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i];
+ }
+ if ((task->mirror_line & (1 << 2)) && line > 0 && line <= FD_STENCIL_MAX) {
+ memcpy(task->dst - line * task->dst_stride, dst, sizeof(*dst) * task->line_size);
+ }
}
return 0;
@@ -309,8 +319,11 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
task->u_mult = u_mult;
task->res_mult = res_mult;
task->mirror_line = mirror_line;
+ task->lines_total = size[1];
+ task->lines_per_task = MAX(1, MIN_ELEM_PER_TASK / size[0]);
- tp_execute(ctx->tp, size[1], residual_calc_task, priv);
+ tp_execute(ctx->tp, (size[1] + task->lines_per_task - 1) / task->lines_per_task,
+ residual_calc_task, priv);
for (size_t i = 0; i < priv->residual_max_size; i++)
res_max = MAX(res_max, priv->residual_max[i]);