From fb3db930b9ba323ee6b5978f723b7241e3e2797e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 16 May 2019 14:37:08 +0200 Subject: tmp --- residual_calc.c | 73 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/residual_calc.c b/residual_calc.c index 3b83c63..8e6f66d 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -30,7 +30,11 @@ #include "mg2d_constants.h" #include "residual_calc.h" +#define MIN_ELEM_PER_TASK 1 + typedef struct ResidualCalcTask { + size_t lines_total; + size_t lines_per_task; size_t line_size; double *dst; @@ -248,35 +252,41 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre ResidualCalcInternal *priv = arg; ResidualCalcTask *task = &priv->task; - const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; - double *dst = task->dst + job_idx * task->dst_stride; - - for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride; - - if (task->u_mult == 0.0) { - priv->residual_calc_line(task->line_size, dst, - priv->residual_max + thread_idx * priv->calc_blocksize, - task->u_stride, task->u + job_idx * task->u_stride, - task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->res_mult); - } else { - priv->residual_add_line(task->line_size, dst, - priv->residual_max + thread_idx * priv->calc_blocksize, - task->u_stride, task->u + job_idx * task->u_stride, - task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->res_mult, task->u_mult); - } - if (task->mirror_line & (1 << 0)) { - for (int i = 1; i <= FD_STENCIL_MAX; i++) - dst[-i] = dst[i]; - } - if (task->mirror_line & (1 << 1)) { - for (int i = 1; i <= FD_STENCIL_MAX; i++) - dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i]; - } - if ((task->mirror_line & (1 << 2)) && job_idx > 0 && job_idx <= FD_STENCIL_MAX) { - memcpy(task->dst - job_idx * task->dst_stride, dst, sizeof(*dst) * task->line_size); + const size_t line_min = job_idx * task->lines_per_task; + const size_t line_max = MIN((job_idx + 1) * task->lines_per_task, task->lines_total); + + for (size_t line = line_min; line < line_max; line++) { + double *dst = task->dst + line * task->dst_stride; + const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; + + for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) + diff_coeffs[i] = task->diff_coeffs[i] + line * task->diff_coeffs_stride; + + if (task->u_mult == 0.0) { + priv->residual_calc_line(task->line_size, dst, + priv->residual_max + thread_idx * priv->calc_blocksize, + task->u_stride, task->u + line * task->u_stride, + task->rhs + line * task->rhs_stride, + diff_coeffs, task->res_mult); + } else { + priv->residual_add_line(task->line_size, dst, + priv->residual_max + thread_idx * priv->calc_blocksize, + task->u_stride, task->u + line * task->u_stride, + task->rhs + line * task->rhs_stride, + diff_coeffs, task->res_mult, task->u_mult); + } + + if (task->mirror_line & (1 << 0)) { + for (int i = 1; i <= FD_STENCIL_MAX; i++) + dst[-i] = dst[i]; + } + if (task->mirror_line & (1 << 1)) { + for (int i = 1; i <= FD_STENCIL_MAX; i++) + dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i]; + } + if ((task->mirror_line & (1 << 2)) && line > 0 && line <= FD_STENCIL_MAX) { + memcpy(task->dst - line * task->dst_stride, dst, sizeof(*dst) * task->line_size); + } } return 0; @@ -309,8 +319,11 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], task->u_mult = u_mult; task->res_mult = res_mult; task->mirror_line = mirror_line; + task->lines_total = size[1]; + task->lines_per_task = MAX(1, MIN_ELEM_PER_TASK / size[0]); - tp_execute(ctx->tp, size[1], residual_calc_task, priv); + tp_execute(ctx->tp, (size[1] + task->lines_per_task - 1) / task->lines_per_task, + residual_calc_task, priv); for (size_t i = 0; i < priv->residual_max_size; i++) res_max = MAX(res_max, priv->residual_max[i]); -- cgit v1.2.3