diff options
-rw-r--r-- | mg2d.c | 9 | ||||
-rw-r--r-- | transfer.c | 57 | ||||
-rw-r--r-- | transfer.h | 1 |
3 files changed, 61 insertions, 6 deletions
@@ -376,9 +376,12 @@ static int mg_levels_init(MG2DContext *ctx) else op_interp = GRID_TRANSFER_LAGRANGE_3; - if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1) - op_restrict = GRID_TRANSFER_FW_1; - else + if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1) { + if (ctx->fd_stencil == 1) + op_restrict = GRID_TRANSFER_FW_1; + else + op_restrict = GRID_TRANSFER_FW_3; + } else op_restrict = op_interp; cur->transfer_restrict = mg2di_gt_alloc(op_restrict); @@ -229,18 +229,26 @@ typedef struct FWThreadData { static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { + static const double factors[] = { 0.5, 1.0, 0.5 }; + static const size_t order = ARRAY_ELEMS(factors) / 2; + FWThreadData *td = arg; - const double *sd = td->src->data + job_idx * 2 * td->src->stride[0]; const ptrdiff_t ss0 = td->src->stride[0]; const ptrdiff_t ss1 = td->src->stride[1]; + const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; double *dd = td->dst->data + job_idx * td->dst->stride[0]; const ptrdiff_t ds1 = td->dst->stride[1]; for (size_t x = 0; x < td->dst->shape[1]; x++) { - *dd = 0.25 * sd[0] + 0.125 * (sd[ss1] + sd[-ss1] + sd[ss0] + sd[-ss0]) - + 0.0625 * (sd[ss0 + ss1] + sd[ss0 - ss1] + sd[-ss0 + ss1] + sd[-ss0 - ss1]); + double val = 0.0; + + for (int i = 0; i < ARRAY_ELEMS(factors); i++) + for (int j = 0; j < ARRAY_ELEMS(factors); j++) + val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + + *dd = 0.25 * val; dd += ds1; sd += ss1 * 2; } @@ -262,10 +270,53 @@ static const GridTransfer transfer_fw_1 = { .transfer = transfer_fw_1_transfer, }; +static int fw_3_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + static const double factors[] = { -1.0 / 16, 9.0 / 16, 1.0, 9.0 / 16, -1.0 / 16 }; + static const size_t order = ARRAY_ELEMS(factors) / 2; + FWThreadData *td = arg; + + const ptrdiff_t ss0 = td->src->stride[0]; + const ptrdiff_t ss1 = td->src->stride[1]; + const double *sd = td->src->data + (job_idx * 2 - order) * ss0 - order * ss1; + + double *dd = td->dst->data + job_idx * td->dst->stride[0]; + const ptrdiff_t ds1 = td->dst->stride[1]; + + for (size_t x = 0; x < td->dst->shape[1]; x++) { + double val = 0.0; + + for (int i = 0; i < ARRAY_ELEMS(factors); i++) + for (int j = 0; j < ARRAY_ELEMS(factors); j++) + val += sd[i * ss0 + j * ss1] * factors[i] * factors[j]; + + *dd = 0.25 * val; + dd += ds1; + sd += ss1 * 2; + } + + return 0; +} +static int transfer_fw_3_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) +{ + FWThreadData td = { dst, src }; + + tp_execute(ctx->tp, ctx->dst.size[0], fw_3_transfer_task, &td); + + return 0; +} + +static const GridTransfer transfer_fw_3 = { + .priv_data_size = sizeof(GridTransferLagrange), + .transfer = transfer_fw_3_transfer, +}; + static const GridTransfer *transfer_ops[] = { [GRID_TRANSFER_LAGRANGE_1] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange, [GRID_TRANSFER_FW_1] = &transfer_fw_1, + [GRID_TRANSFER_FW_3] = &transfer_fw_3, }; int mg2di_gt_transfer(GridTransferContext *ctx, @@ -28,6 +28,7 @@ enum GridTransferOperator { GRID_TRANSFER_LAGRANGE_1, GRID_TRANSFER_LAGRANGE_3, GRID_TRANSFER_FW_1, + GRID_TRANSFER_FW_3, }; typedef struct RegularGrid2D { |