From 1980fccbc872d3c203034f236951eb9834fe916e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 12 May 2019 09:06:03 +0200 Subject: transfer: unify the code for full-weighted transfer Also, rename the operator names to make more sense. --- mg2d.c | 4 +- transfer.c | 154 ++++++++++++++++++++++--------------------------------------- transfer.h | 2 +- 3 files changed, 57 insertions(+), 103 deletions(-) diff --git a/mg2d.c b/mg2d.c index 5f2ce22..84985d7 100644 --- a/mg2d.c +++ b/mg2d.c @@ -405,9 +405,9 @@ static int mg_levels_init(MG2DContext *ctx) if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1) { if (ctx->fd_stencil == 1) - op_restrict = GRID_TRANSFER_FW_3; + op_restrict = GRID_TRANSFER_FW_2; else - op_restrict = GRID_TRANSFER_FW_5; + op_restrict = GRID_TRANSFER_FW_3; } else op_restrict = op_interp; diff --git a/transfer.c b/transfer.c index db3cecb..14b9367 100644 --- a/transfer.c +++ b/transfer.c @@ -261,31 +261,37 @@ static const GridTransfer transfer_lagrange = { .transfer = transfer_lagrange_transfer, }; -typedef struct FWThreadData { +static const double fw_factors_1[] = { 0.5, 1.0, 0.5 }; +static const double fw_factors_2[] = { -1.0 / 16, 9.0 / 16, 1.0, 9.0 / 16, -1.0 / 16 }; +static const double fw_factors_3[] = { 3.0 / 256, -25.0 / 256, 75.0 / 128, 1.0, 75.0 / 128, -25.0 / 256, 3.0 / 256 }; + +typedef struct GridTransferFW { + ptrdiff_t order; + const double *factors; + NDArray *dst; const NDArray *src; -} FWThreadData; +} GridTransferFW; -static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +static int fw_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { - static const double factors[] = { 0.5, 1.0, 0.5 }; - static const size_t order = ARRAY_ELEMS(factors) / 2; - - FWThreadData *td = arg; + GridTransferFW *fw = arg; + NDArray *dst = fw->dst; + const NDArray *src = fw->src; - const ptrdiff_t ss0 = td->src->stride[0]; - const ptrdiff_t ss1 = td->src->stride[1]; - const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; + const ptrdiff_t ss0 = src->stride[0]; + const ptrdiff_t ss1 = src->stride[1]; + const double *sd = src->data + (job_idx * 2 - fw->order) * ss0 - fw->order * ss1; - double *dd = td->dst->data + job_idx * td->dst->stride[0]; - const ptrdiff_t ds1 = td->dst->stride[1]; + double *dd = dst->data + job_idx * dst->stride[0]; + const ptrdiff_t ds1 = dst->stride[1]; - for (size_t x = 0; x < td->dst->shape[1]; x++) { + for (size_t x = 0; x < dst->shape[1]; x++) { double val = 0.0; - for (int i = 0; i < ARRAY_ELEMS(factors); i++) - for (int j = 0; j < ARRAY_ELEMS(factors); j++) - val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + for (int i = 0; i < fw->order * 2 + 1; i++) + for (int j = 0; j < fw->order * 2 + 1; j++) + val += sd[i * ss0 + j * ss1] * fw->factors[i] * fw->factors[j]; *dd = 0.25 * val; dd += ds1; @@ -294,103 +300,51 @@ static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thre return 0; } -static int transfer_fw_1_transfer(GridTransferContext *ctx, - NDArray *dst, const NDArray *src) -{ - FWThreadData td = { dst, src }; - - tp_execute(ctx->tp, ctx->dst.size[0], fw_1_transfer_task, &td); - - return 0; -} - -static const GridTransfer transfer_fw_1 = { - .priv_data_size = sizeof(GridTransferLagrange), - .transfer = transfer_fw_1_transfer, -}; -static int fw_3_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +static int transfer_fw_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) { - static const double factors[] = { -1.0 / 16, 9.0 / 16, 1.0, 9.0 / 16, -1.0 / 16 }; - static const size_t order = ARRAY_ELEMS(factors) / 2; - FWThreadData *td = arg; - - const ptrdiff_t ss0 = td->src->stride[0]; - const ptrdiff_t ss1 = td->src->stride[1]; - const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; + GridTransferFW *fw = ctx->priv; - double *dd = td->dst->data + job_idx * td->dst->stride[0]; - const ptrdiff_t ds1 = td->dst->stride[1]; + fw->src = src; + fw->dst = dst; - for (size_t x = 0; x < td->dst->shape[1]; x++) { - double val = 0.0; - - for (int i = 0; i < ARRAY_ELEMS(factors); i++) - for (int j = 0; j < ARRAY_ELEMS(factors); j++) - val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; - - *dd = 0.25 * val; - dd += ds1; - sd += ss1 * 2; - } - - return 0; -} -static int transfer_fw_3_transfer(GridTransferContext *ctx, - NDArray *dst, const NDArray *src) -{ - FWThreadData td = { dst, src }; + tp_execute(ctx->tp, ctx->dst.size[0], fw_transfer_task, fw); - tp_execute(ctx->tp, ctx->dst.size[0], fw_3_transfer_task, &td); + fw->src = NULL; + fw->dst = NULL; return 0; } -static const GridTransfer transfer_fw_3 = { - .priv_data_size = sizeof(GridTransferLagrange), - .transfer = transfer_fw_3_transfer, -}; - -static int fw_5_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +static int transfer_fw_init(GridTransferContext *ctx) { - static const double factors[] = { 3.0 / 256, -25.0 / 256, 75.0 / 128, 1.0, 75.0 / 128, -25.0 / 256, 3.0 / 256 }; - static const size_t order = ARRAY_ELEMS(factors) / 2; - FWThreadData *td = arg; - - const ptrdiff_t ss0 = td->src->stride[0]; - const ptrdiff_t ss1 = td->src->stride[1]; - const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; - - double *dd = td->dst->data + job_idx * td->dst->stride[0]; - const ptrdiff_t ds1 = td->dst->stride[1]; - - for (size_t x = 0; x < td->dst->shape[1]; x++) { - double val = 0.0; - - for (int i = 0; i < ARRAY_ELEMS(factors); i++) - for (int j = 0; j < ARRAY_ELEMS(factors); j++) - val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + GridTransferFW *fw = ctx->priv; - *dd = 0.25 * val; - dd += ds1; - sd += ss1 * 2; + switch (ctx->op) { + case GRID_TRANSFER_FW_1: + fw->order = 1; + fw->factors = fw_factors_1; + break; + case GRID_TRANSFER_FW_2: + fw->order = 2; + fw->factors = fw_factors_2; + break; + case GRID_TRANSFER_FW_3: + fw->order = 3; + fw->factors = fw_factors_3; + break; + default: + return -EINVAL; } return 0; } -static int transfer_fw_5_transfer(GridTransferContext *ctx, - NDArray *dst, const NDArray *src) -{ - FWThreadData td = { dst, src }; - tp_execute(ctx->tp, ctx->dst.size[0], fw_5_transfer_task, &td); - - return 0; -} - -static const GridTransfer transfer_fw_5 = { - .priv_data_size = sizeof(GridTransferLagrange), - .transfer = transfer_fw_5_transfer, +static const GridTransfer transfer_fw = { + .priv_data_size = sizeof(GridTransferFW), + .init = transfer_fw_init, + .transfer = transfer_fw_transfer, }; static const GridTransfer *transfer_ops[] = { @@ -398,9 +352,9 @@ static const GridTransfer *transfer_ops[] = { [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_5] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_7] = &transfer_lagrange, - [GRID_TRANSFER_FW_1] = &transfer_fw_1, - [GRID_TRANSFER_FW_3] = &transfer_fw_3, - [GRID_TRANSFER_FW_5] = &transfer_fw_5, + [GRID_TRANSFER_FW_1] = &transfer_fw, + [GRID_TRANSFER_FW_2] = &transfer_fw, + [GRID_TRANSFER_FW_3] = &transfer_fw, }; int mg2di_gt_transfer(GridTransferContext *ctx, diff --git a/transfer.h b/transfer.h index d5c61a4..bb9a035 100644 --- a/transfer.h +++ b/transfer.h @@ -30,8 +30,8 @@ enum GridTransferOperator { GRID_TRANSFER_LAGRANGE_5, GRID_TRANSFER_LAGRANGE_7, GRID_TRANSFER_FW_1, + GRID_TRANSFER_FW_2, GRID_TRANSFER_FW_3, - GRID_TRANSFER_FW_5, }; typedef struct RegularGrid2D { -- cgit v1.2.3