From d215c872bbdf8ba733f9a9fbd586374b841fdfb5 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 23 Mar 2019 17:55:57 +0100 Subject: Add a new separate module for grid transfers/interpolation. --- transfer.c | 346 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 transfer.c (limited to 'transfer.c') diff --git a/transfer.c b/transfer.c new file mode 100644 index 0000000..6c8f614 --- /dev/null +++ b/transfer.c @@ -0,0 +1,346 @@ +/* + * Grid transfer operators + * Copyright 2019 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "cpu.h" +#include "ndarray.h" +#include "transfer.h" + +typedef struct GridTransfer { + size_t priv_data_size; + int (*init)(GridTransferContext *ctx); + void (*free)(GridTransferContext *ctx); + int (*transfer)(GridTransferContext *ctx, NDArray *dst, const NDArray *src); +} GridTransfer; + +typedef struct GridTransferLagrange { + unsigned int stencil; + + void (*transfer_cont) (double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); + void (*transfer_generic)(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y, + ptrdiff_t dst_stride0, ptrdiff_t src_stride0); + + const NDArray *src; + NDArray *dst; + + ptrdiff_t *idx[2]; + double *fact[2]; +} GridTransferLagrange; + +#if HAVE_EXTERNAL_ASM +void mg2di_transfer_interp_line_cont_4_fma3(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, + const double *fact_x, const double *fact_y); +#endif + +#define STENCIL 2 +#define CONTIGUOUS 0 +#include "transfer_interp_template.c" +#undef CONTIGUOUS + +#define CONTIGUOUS 1 +#include "transfer_interp_template.c" +#undef CONTIGUOUS +#undef STENCIL + +#define STENCIL 4 +#define CONTIGUOUS 0 +#include "transfer_interp_template.c" +#undef CONTIGUOUS + +#define CONTIGUOUS 1 +#include "transfer_interp_template.c" +#undef CONTIGUOUS +#undef STENCIL + +// generate the interpolation source indices and weights +static int transfer_factors_lagrange(GridTransferContext *ctx, unsigned int dim) +{ + GridTransferLagrange *priv = ctx->priv; + const double step_dst = ctx->dst.step[dim]; + const double step_src = ctx->src.step[dim]; + const double scale = step_dst / step_src; + + double *coord_src; + + coord_src = calloc(priv->stencil, sizeof(*coord_src)); + if (!coord_src) + return -ENOMEM; + + for (ptrdiff_t idx_dst = 0; idx_dst < ctx->dst.size[dim]; idx_dst++) { + const ptrdiff_t idx_src = scale * idx_dst - (priv->stencil / 2 - 1); + const double coord_dst = idx_dst * step_dst; + + double *fact = priv->fact[dim] + priv->stencil * idx_dst; + + for (int i = 0; i < priv->stencil; i++) + coord_src[i] = (idx_src + i) * step_src; + + for (int i = 0; i < priv->stencil; i++) { + fact[i] = 1.0; + for (int j = 0; j < priv->stencil; j++) { + if (i == j) + continue; + + fact[i] *= (coord_dst - coord_src[j]) / (coord_src[i] - coord_src[j]); + } + } + + priv->idx[dim][idx_dst] = idx_src; + } + + free(coord_src); + + return 0; +} + +static int transfer_lagrange_init(GridTransferContext *ctx) +{ + GridTransferLagrange *priv = ctx->priv; + int ret; + + switch (ctx->op) { + case GRID_TRANSFER_LAGRANGE_1: + priv->stencil = 2; + priv->transfer_cont = interp_transfer_line_cont_2; + priv->transfer_generic = interp_transfer_line_generic_2; + break; + case GRID_TRANSFER_LAGRANGE_3: + priv->transfer_cont = interp_transfer_line_cont_4; + priv->transfer_generic = interp_transfer_line_generic_4; + priv->stencil = 4; + +#if HAVE_EXTERNAL_ASM + if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { + priv->transfer_cont = mg2di_transfer_interp_line_cont_4_fma3; + } +#endif + break; + default: + return -EINVAL; + } + + for (int dim = 0; dim < 2; dim++) { + priv->idx[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->idx[dim])); + if (!priv->idx[dim]) + return -ENOMEM; + + priv->fact[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->fact[dim]) * priv->stencil); + if (!priv->fact[dim]) + return -ENOMEM; + + ret = transfer_factors_lagrange(ctx, dim); + if (ret < 0) + return ret; + } + + return 0; +} + +static void transfer_lagrange_free(GridTransferContext *ctx) +{ + GridTransferLagrange *priv = ctx->priv; + + for (int dim = 0; dim < 2; dim++) { + free(priv->idx[dim]); + free(priv->fact[dim]); + } +} + +static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + GridTransferLagrange *priv = arg; + const NDArray *src = priv->src; + NDArray *dst = priv->dst; + + const ptrdiff_t idx_y = priv->idx[0][job_idx]; + const double *fact_y = priv->fact[0] + priv->stencil * job_idx; + + if (dst->stride[1] == 1 && src->stride[1] == 1) { + priv->transfer_cont(dst->data + job_idx * dst->stride[0], dst->shape[1], + src->data + idx_y * src->stride[0], src->stride[0], + priv->idx[1], priv->fact[1], fact_y); + } else { + priv->transfer_generic(dst->data + job_idx * dst->stride[0], dst->shape[1], + src->data + idx_y * src->stride[0], src->stride[0], + priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]); + } + + return 0; +} + +static int transfer_lagrange_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) +{ + GridTransferLagrange *priv = ctx->priv; + + priv->src = src; + priv->dst = dst; + + tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp_task, priv); + + priv->src = NULL; + priv->dst = NULL; + + return 0; +} + +static const GridTransfer transfer_lagrange = { + .priv_data_size = sizeof(GridTransferLagrange), + .init = transfer_lagrange_init, + .free = transfer_lagrange_free, + .transfer = transfer_lagrange_transfer, +}; + +typedef struct FWThreadData { + NDArray *dst; + const NDArray *src; +} FWThreadData; + +static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + FWThreadData *td = arg; + + const double *sd = td->src->data + job_idx * 2 * td->src->stride[0]; + const ptrdiff_t ss0 = td->src->stride[0]; + const ptrdiff_t ss1 = td->src->stride[1]; + + double *dd = td->dst->data + job_idx * td->dst->stride[0]; + const ptrdiff_t ds1 = td->dst->stride[1]; + + for (size_t x = 0; x < td->dst->shape[1]; x++) { + *dd = 0.25 * sd[0] + 0.125 * (sd[ss1] + sd[-ss1] + sd[ss0] + sd[-ss0]) + + 0.0625 * (sd[ss0 + ss1] + sd[ss0 - ss1] + sd[-ss0 + ss1] + sd[-ss0 - ss1]); + dd += ds1; + sd += ss1 * 2; + } + + return 0; +} +static int transfer_fw_1_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) +{ + FWThreadData td = { dst, src }; + + tp_execute(ctx->tp, ctx->dst.size[0], fw_1_transfer_task, &td); + + return 0; +} + +static const GridTransfer transfer_fw_1 = { + .priv_data_size = sizeof(GridTransferLagrange), + .transfer = transfer_fw_1_transfer, +}; + +static const GridTransfer *transfer_ops[] = { + [GRID_TRANSFER_LAGRANGE_1] = &transfer_lagrange, + [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange, + [GRID_TRANSFER_FW_1] = &transfer_fw_1, +}; + +int mg2di_gt_transfer(GridTransferContext *ctx, + NDArray *dst, const NDArray *src) +{ + const GridTransfer *t = transfer_ops[ctx->op]; + + if (dst->dims != 2 || src->dims != 2 || + dst->shape[0] != ctx->dst.size[0] || dst->shape[1] != ctx->dst.size[1] || + src->shape[0] != ctx->src.size[0] || src->shape[1] != ctx->src.size[1]) + return -EINVAL; + + return t->transfer(ctx, dst, src); +} + +GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op) +{ + GridTransferContext *ctx; + const GridTransfer *t; + + if (op < 0 || op > ARRAY_ELEMS(transfer_ops)) + return NULL; + t = transfer_ops[op]; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + if (t->priv_data_size) { + ctx->priv = calloc(1, t->priv_data_size); + if (!ctx->priv) { + free(ctx); + return NULL; + } + } + ctx->op = op; + + return ctx; +} + +int mg2di_gt_init(GridTransferContext *ctx) +{ + const GridTransfer *t = transfer_ops[ctx->op]; + int ret; + + if (!ctx->tp) + return -EINVAL; + + /* TODO check that the destination grid is not outside the source one */ + + if (t->init) { + ret = t->init(ctx); + if (ret < 0) + return ret; + } + + return 0; +} + +void mg2di_gt_free(GridTransferContext **pctx) +{ + GridTransferContext *ctx = *pctx; + + if (!ctx) + return; + + if (ctx->priv) { + const GridTransfer *t = transfer_ops[ctx->op]; + + if (t->free) + t->free(ctx); + } + + free(ctx->priv); + + free(ctx); + *pctx = NULL; +} -- cgit v1.2.3