From a1f10a1338e77dd147e7c94f3f401d2703181bca Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 14 Jun 2019 15:29:02 +0200 Subject: rescalc: improve reflection boundary conditions Make parameter names more clear/consistent, document them, implement missing 1U boundary. --- residual_calc.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'residual_calc.c') diff --git a/residual_calc.c b/residual_calc.c index 3b83c63..6d58c10 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -27,11 +27,12 @@ #include "common.h" #include "cpu.h" +#include "mg2d_boundary.h" #include "mg2d_constants.h" #include "residual_calc.h" typedef struct ResidualCalcTask { - size_t line_size; + size_t size[2]; double *dst; ptrdiff_t dst_stride; @@ -48,7 +49,8 @@ typedef struct ResidualCalcTask { double u_mult; double res_mult; - int mirror_line; + int reflect; + size_t reflect_dist; } ResidualCalcTask; struct ResidualCalcInternal { @@ -255,28 +257,34 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride; if (task->u_mult == 0.0) { - priv->residual_calc_line(task->line_size, dst, + priv->residual_calc_line(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, diff_coeffs, task->res_mult); } else { - priv->residual_add_line(task->line_size, dst, + priv->residual_add_line(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, diff_coeffs, task->res_mult, task->u_mult); } - if (task->mirror_line & (1 << 0)) { - for (int i = 1; i <= FD_STENCIL_MAX; i++) + + if (task->reflect & (1 << MG2D_BOUNDARY_0L)) { + for (int i = 1; i <= task->reflect_dist; i++) dst[-i] = dst[i]; } - if (task->mirror_line & (1 << 1)) { - for (int i = 1; i <= FD_STENCIL_MAX; i++) - dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i]; + if (task->reflect & (1 << MG2D_BOUNDARY_0U)) { + for (int i = 1; i <= task->reflect_dist; i++) + dst[task->size[0] - 1 + i] = dst[task->size[0] - 1 - i]; + } + if ((task->reflect & (1 << MG2D_BOUNDARY_1L)) && + job_idx > 0 && job_idx <= task->reflect_dist) { + memcpy(task->dst - job_idx * task->dst_stride, dst, sizeof(*dst) * task->size[0]); } - if ((task->mirror_line & (1 << 2)) && job_idx > 0 && job_idx <= FD_STENCIL_MAX) { - memcpy(task->dst - job_idx * task->dst_stride, dst, sizeof(*dst) * task->line_size); + if ((task->reflect & (1 << MG2D_BOUNDARY_1U)) && + job_idx >= task->size[1] - 1 - task->reflect_dist && job_idx < task->size[1] - 1) { + memcpy(task->dst + (2 * (task->size[1] - 1) - job_idx) * task->dst_stride, dst, sizeof(*dst) * task->size[0]); } return 0; @@ -289,7 +297,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], const double *rhs, ptrdiff_t rhs_stride, const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], ptrdiff_t diff_coeffs_stride, - double u_mult, double res_mult, int mirror_line) + double u_mult, double res_mult, + int reflect, size_t reflect_dist) { ResidualCalcInternal *priv = ctx->priv; ResidualCalcTask *task = &priv->task; @@ -297,7 +306,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], memset(priv->residual_max, 0, sizeof(*priv->residual_max) * priv->residual_max_size); - task->line_size = size[0]; + task->size[0] = size[0]; + task->size[1] = size[1]; task->dst = dst; task->dst_stride = dst_stride; task->u = u; @@ -308,7 +318,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], task->diff_coeffs_stride = diff_coeffs_stride; task->u_mult = u_mult; task->res_mult = res_mult; - task->mirror_line = mirror_line; + task->reflect = reflect; + task->reflect_dist = reflect_dist; tp_execute(ctx->tp, size[1], residual_calc_task, priv); -- cgit v1.2.3