aboutsummaryrefslogtreecommitdiff
path: root/transfer.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-23 17:55:57 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-23 17:55:57 +0100
commitd215c872bbdf8ba733f9a9fbd586374b841fdfb5 (patch)
treefb0072a8dc35179ef35d86c6fbb057b6c504445c /transfer.c
parentbd178d67da6a8c30b3ccbd020be1b00f42eceb53 (diff)
Add a new separate module for grid transfers/interpolation.
Diffstat (limited to 'transfer.c')
-rw-r--r--transfer.c346
1 files changed, 346 insertions, 0 deletions
diff --git a/transfer.c b/transfer.c
new file mode 100644
index 0000000..6c8f614
--- /dev/null
+++ b/transfer.c
@@ -0,0 +1,346 @@
+/*
+ * Grid transfer operators
+ * Copyright 2019 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <threadpool.h>
+
+#include "common.h"
+#include "cpu.h"
+#include "ndarray.h"
+#include "transfer.h"
+
+typedef struct GridTransfer {
+ size_t priv_data_size;
+ int (*init)(GridTransferContext *ctx);
+ void (*free)(GridTransferContext *ctx);
+ int (*transfer)(GridTransferContext *ctx, NDArray *dst, const NDArray *src);
+} GridTransfer;
+
+typedef struct GridTransferLagrange {
+ unsigned int stencil;
+
+ void (*transfer_cont) (double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y);
+ void (*transfer_generic)(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y,
+ ptrdiff_t dst_stride0, ptrdiff_t src_stride0);
+
+ const NDArray *src;
+ NDArray *dst;
+
+ ptrdiff_t *idx[2];
+ double *fact[2];
+} GridTransferLagrange;
+
+#if HAVE_EXTERNAL_ASM
+void mg2di_transfer_interp_line_cont_4_fma3(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x,
+ const double *fact_x, const double *fact_y);
+#endif
+
+#define STENCIL 2
+#define CONTIGUOUS 0
+#include "transfer_interp_template.c"
+#undef CONTIGUOUS
+
+#define CONTIGUOUS 1
+#include "transfer_interp_template.c"
+#undef CONTIGUOUS
+#undef STENCIL
+
+#define STENCIL 4
+#define CONTIGUOUS 0
+#include "transfer_interp_template.c"
+#undef CONTIGUOUS
+
+#define CONTIGUOUS 1
+#include "transfer_interp_template.c"
+#undef CONTIGUOUS
+#undef STENCIL
+
+// generate the interpolation source indices and weights
+static int transfer_factors_lagrange(GridTransferContext *ctx, unsigned int dim)
+{
+ GridTransferLagrange *priv = ctx->priv;
+ const double step_dst = ctx->dst.step[dim];
+ const double step_src = ctx->src.step[dim];
+ const double scale = step_dst / step_src;
+
+ double *coord_src;
+
+ coord_src = calloc(priv->stencil, sizeof(*coord_src));
+ if (!coord_src)
+ return -ENOMEM;
+
+ for (ptrdiff_t idx_dst = 0; idx_dst < ctx->dst.size[dim]; idx_dst++) {
+ const ptrdiff_t idx_src = scale * idx_dst - (priv->stencil / 2 - 1);
+ const double coord_dst = idx_dst * step_dst;
+
+ double *fact = priv->fact[dim] + priv->stencil * idx_dst;
+
+ for (int i = 0; i < priv->stencil; i++)
+ coord_src[i] = (idx_src + i) * step_src;
+
+ for (int i = 0; i < priv->stencil; i++) {
+ fact[i] = 1.0;
+ for (int j = 0; j < priv->stencil; j++) {
+ if (i == j)
+ continue;
+
+ fact[i] *= (coord_dst - coord_src[j]) / (coord_src[i] - coord_src[j]);
+ }
+ }
+
+ priv->idx[dim][idx_dst] = idx_src;
+ }
+
+ free(coord_src);
+
+ return 0;
+}
+
+static int transfer_lagrange_init(GridTransferContext *ctx)
+{
+ GridTransferLagrange *priv = ctx->priv;
+ int ret;
+
+ switch (ctx->op) {
+ case GRID_TRANSFER_LAGRANGE_1:
+ priv->stencil = 2;
+ priv->transfer_cont = interp_transfer_line_cont_2;
+ priv->transfer_generic = interp_transfer_line_generic_2;
+ break;
+ case GRID_TRANSFER_LAGRANGE_3:
+ priv->transfer_cont = interp_transfer_line_cont_4;
+ priv->transfer_generic = interp_transfer_line_generic_4;
+ priv->stencil = 4;
+
+#if HAVE_EXTERNAL_ASM
+ if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
+ priv->transfer_cont = mg2di_transfer_interp_line_cont_4_fma3;
+ }
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (int dim = 0; dim < 2; dim++) {
+ priv->idx[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->idx[dim]));
+ if (!priv->idx[dim])
+ return -ENOMEM;
+
+ priv->fact[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->fact[dim]) * priv->stencil);
+ if (!priv->fact[dim])
+ return -ENOMEM;
+
+ ret = transfer_factors_lagrange(ctx, dim);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void transfer_lagrange_free(GridTransferContext *ctx)
+{
+ GridTransferLagrange *priv = ctx->priv;
+
+ for (int dim = 0; dim < 2; dim++) {
+ free(priv->idx[dim]);
+ free(priv->fact[dim]);
+ }
+}
+
+static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ GridTransferLagrange *priv = arg;
+ const NDArray *src = priv->src;
+ NDArray *dst = priv->dst;
+
+ const ptrdiff_t idx_y = priv->idx[0][job_idx];
+ const double *fact_y = priv->fact[0] + priv->stencil * job_idx;
+
+ if (dst->stride[1] == 1 && src->stride[1] == 1) {
+ priv->transfer_cont(dst->data + job_idx * dst->stride[0], dst->shape[1],
+ src->data + idx_y * src->stride[0], src->stride[0],
+ priv->idx[1], priv->fact[1], fact_y);
+ } else {
+ priv->transfer_generic(dst->data + job_idx * dst->stride[0], dst->shape[1],
+ src->data + idx_y * src->stride[0], src->stride[0],
+ priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]);
+ }
+
+ return 0;
+}
+
+static int transfer_lagrange_transfer(GridTransferContext *ctx,
+ NDArray *dst, const NDArray *src)
+{
+ GridTransferLagrange *priv = ctx->priv;
+
+ priv->src = src;
+ priv->dst = dst;
+
+ tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp_task, priv);
+
+ priv->src = NULL;
+ priv->dst = NULL;
+
+ return 0;
+}
+
+static const GridTransfer transfer_lagrange = {
+ .priv_data_size = sizeof(GridTransferLagrange),
+ .init = transfer_lagrange_init,
+ .free = transfer_lagrange_free,
+ .transfer = transfer_lagrange_transfer,
+};
+
+typedef struct FWThreadData {
+ NDArray *dst;
+ const NDArray *src;
+} FWThreadData;
+
+static int fw_1_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ FWThreadData *td = arg;
+
+ const double *sd = td->src->data + job_idx * 2 * td->src->stride[0];
+ const ptrdiff_t ss0 = td->src->stride[0];
+ const ptrdiff_t ss1 = td->src->stride[1];
+
+ double *dd = td->dst->data + job_idx * td->dst->stride[0];
+ const ptrdiff_t ds1 = td->dst->stride[1];
+
+ for (size_t x = 0; x < td->dst->shape[1]; x++) {
+ *dd = 0.25 * sd[0] + 0.125 * (sd[ss1] + sd[-ss1] + sd[ss0] + sd[-ss0])
+ + 0.0625 * (sd[ss0 + ss1] + sd[ss0 - ss1] + sd[-ss0 + ss1] + sd[-ss0 - ss1]);
+ dd += ds1;
+ sd += ss1 * 2;
+ }
+
+ return 0;
+}
+static int transfer_fw_1_transfer(GridTransferContext *ctx,
+ NDArray *dst, const NDArray *src)
+{
+ FWThreadData td = { dst, src };
+
+ tp_execute(ctx->tp, ctx->dst.size[0], fw_1_transfer_task, &td);
+
+ return 0;
+}
+
+static const GridTransfer transfer_fw_1 = {
+ .priv_data_size = sizeof(GridTransferLagrange),
+ .transfer = transfer_fw_1_transfer,
+};
+
+static const GridTransfer *transfer_ops[] = {
+ [GRID_TRANSFER_LAGRANGE_1] = &transfer_lagrange,
+ [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange,
+ [GRID_TRANSFER_FW_1] = &transfer_fw_1,
+};
+
+int mg2di_gt_transfer(GridTransferContext *ctx,
+ NDArray *dst, const NDArray *src)
+{
+ const GridTransfer *t = transfer_ops[ctx->op];
+
+ if (dst->dims != 2 || src->dims != 2 ||
+ dst->shape[0] != ctx->dst.size[0] || dst->shape[1] != ctx->dst.size[1] ||
+ src->shape[0] != ctx->src.size[0] || src->shape[1] != ctx->src.size[1])
+ return -EINVAL;
+
+ return t->transfer(ctx, dst, src);
+}
+
+GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op)
+{
+ GridTransferContext *ctx;
+ const GridTransfer *t;
+
+ if (op < 0 || op > ARRAY_ELEMS(transfer_ops))
+ return NULL;
+ t = transfer_ops[op];
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (t->priv_data_size) {
+ ctx->priv = calloc(1, t->priv_data_size);
+ if (!ctx->priv) {
+ free(ctx);
+ return NULL;
+ }
+ }
+ ctx->op = op;
+
+ return ctx;
+}
+
+int mg2di_gt_init(GridTransferContext *ctx)
+{
+ const GridTransfer *t = transfer_ops[ctx->op];
+ int ret;
+
+ if (!ctx->tp)
+ return -EINVAL;
+
+ /* TODO check that the destination grid is not outside the source one */
+
+ if (t->init) {
+ ret = t->init(ctx);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+void mg2di_gt_free(GridTransferContext **pctx)
+{
+ GridTransferContext *ctx = *pctx;
+
+ if (!ctx)
+ return;
+
+ if (ctx->priv) {
+ const GridTransfer *t = transfer_ops[ctx->op];
+
+ if (t->free)
+ t->free(ctx);
+ }
+
+ free(ctx->priv);
+
+ free(ctx);
+ *pctx = NULL;
+}