From e1d1bdc0d20cc379f2cf31fca84f6179eba0cb36 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 23 Mar 2019 19:34:48 +0100 Subject: mg2d: use appropriate full-weighted restriction for 3rd order FDs --- transfer.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) (limited to 'transfer.c') diff --git a/transfer.c b/transfer.c index 046d96d..232d6e1 100644 --- a/transfer.c +++ b/transfer.c @@ -229,18 +229,26 @@ typedef struct FWThreadData { static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { + static const double factors[] = { 0.5, 1.0, 0.5 }; + static const size_t order = ARRAY_ELEMS(factors) / 2; + FWThreadData *td = arg; - const double *sd = td->src->data + job_idx * 2 * td->src->stride[0]; const ptrdiff_t ss0 = td->src->stride[0]; const ptrdiff_t ss1 = td->src->stride[1]; + const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; double *dd = td->dst->data + job_idx * td->dst->stride[0]; const ptrdiff_t ds1 = td->dst->stride[1]; for (size_t x = 0; x < td->dst->shape[1]; x++) { - *dd = 0.25 * sd[0] + 0.125 * (sd[ss1] + sd[-ss1] + sd[ss0] + sd[-ss0]) - + 0.0625 * (sd[ss0 + ss1] + sd[ss0 - ss1] + sd[-ss0 + ss1] + sd[-ss0 - ss1]); + double val = 0.0; + + for (int i = 0; i < ARRAY_ELEMS(factors); i++) + for (int j = 0; j < ARRAY_ELEMS(factors); j++) + val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + + *dd = 0.25 * val; dd += ds1; sd += ss1 * 2; } @@ -262,10 +270,53 @@ static const GridTransfer transfer_fw_1 = { .transfer = transfer_fw_1_transfer, }; +static int fw_3_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + static const double factors[] = { -1.0 / 16, 9.0 / 16, 1.0, 9.0 / 16, -1.0 / 16 }; + static const size_t order = ARRAY_ELEMS(factors) / 2; + FWThreadData *td = arg; + + const ptrdiff_t ss0 = td->src->stride[0]; + const ptrdiff_t ss1 = td->src->stride[1]; + const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; + + double *dd = td->dst->data + job_idx * td->dst->stride[0]; + const ptrdiff_t ds1 = td->dst->stride[1]; + + for (size_t x = 0; x < td->dst->shape[1]; x++) { + double val = 0.0; + + for (int i = 0; i < ARRAY_ELEMS(factors); i++) + for (int j = 0; j < ARRAY_ELEMS(factors); j++) + val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + + *dd = 0.25 * val; + dd += ds1; + sd += ss1 * 2; + } + + return 0; +} +static int transfer_fw_3_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) +{ + FWThreadData td = { dst, src }; + + tp_execute(ctx->tp, ctx->dst.size[0], fw_3_transfer_task, &td); + + return 0; +} + +static const GridTransfer transfer_fw_3 = { + .priv_data_size = sizeof(GridTransferLagrange), + .transfer = transfer_fw_3_transfer, +}; + static const GridTransfer *transfer_ops[] = { [GRID_TRANSFER_LAGRANGE_1] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange, [GRID_TRANSFER_FW_1] = &transfer_fw_1, + [GRID_TRANSFER_FW_3] = &transfer_fw_3, }; int mg2di_gt_transfer(GridTransferContext *ctx, -- cgit v1.2.3