/* * Grid transfer operators * Copyright 2019 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "config.h" #include #include #include #include #include #include #include #include #include "common.h" #include "cpu.h" #include "transfer.h" typedef struct GridTransfer { size_t priv_data_size; int (*init)(GridTransferContext *ctx); void (*free)(GridTransferContext *ctx); int (*transfer)(GridTransferContext *ctx, NDArray *dst, const NDArray *src); } GridTransfer; typedef struct GridTransferLagrange { unsigned int stencil; void (*transfer2d_cont) (double *dst, ptrdiff_t dst_len, const double *src, ptrdiff_t src_stride, const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); void (*transfer2d_generic)(double *dst, ptrdiff_t dst_len, const double *src, ptrdiff_t src_stride, const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y, ptrdiff_t dst_stride0, ptrdiff_t src_stride0); void (*transfer1d_cont) (double *dst, ptrdiff_t dst_len, const double *src, const ptrdiff_t *idx_x, const double *fact_x); void (*transfer1d_generic)(double *dst, ptrdiff_t dst_len, const double *src, const ptrdiff_t *idx_x, const double *fact_x, ptrdiff_t dst_stride0, ptrdiff_t src_stride0); const NDArray *src; NDArray *dst; ptrdiff_t **idx; double **fact; } GridTransferLagrange; #if HAVE_NASM void mg2di_transfer_interp2d_line_cont_4_avx2(double *dst, ptrdiff_t dst_len, const double *src, ptrdiff_t src_stride, const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); void mg2di_transfer_interp2d_line_cont_6_avx2(double *dst, ptrdiff_t dst_len, const double *src, ptrdiff_t src_stride, const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); #endif #define STENCIL 2 #define CONTIGUOUS 0 #include "transfer_interp_template.c" #undef CONTIGUOUS #define CONTIGUOUS 1 #include "transfer_interp_template.c" #undef CONTIGUOUS #undef STENCIL #define STENCIL 4 #define CONTIGUOUS 0 #include "transfer_interp_template.c" #undef CONTIGUOUS #define CONTIGUOUS 1 #include "transfer_interp_template.c" #undef CONTIGUOUS #undef STENCIL #define STENCIL 6 #define CONTIGUOUS 0 #include "transfer_interp_template.c" #undef CONTIGUOUS #define CONTIGUOUS 1 #include "transfer_interp_template.c" #undef CONTIGUOUS #undef STENCIL #define STENCIL 8 #define CONTIGUOUS 0 #include "transfer_interp_template.c" #undef CONTIGUOUS #define CONTIGUOUS 1 #include "transfer_interp_template.c" #undef CONTIGUOUS #undef STENCIL // generate the interpolation source indices and weights static int transfer_factors_lagrange(GridTransferContext *ctx, unsigned int dim) { GridTransferLagrange *priv = ctx->priv; const double step_dst = ctx->dst.step[dim]; const double step_src = ctx->src.step[dim]; const double scale = step_dst / step_src; double *coord_src; coord_src = calloc(priv->stencil, sizeof(*coord_src)); if (!coord_src) return -ENOMEM; for (ptrdiff_t idx_dst = 0; idx_dst < ctx->dst.size[dim]; idx_dst++) { const ptrdiff_t idx_src = (ptrdiff_t)(scale * (idx_dst + ctx->dst.start[dim])) - ctx->src.start[dim] - (priv->stencil / 2 - 1); const double coord_dst = (idx_dst + ctx->dst.start[dim]) * step_dst; double *fact = priv->fact[dim] + priv->stencil * idx_dst; for (int i = 0; i < priv->stencil; i++) coord_src[i] = (idx_src + ctx->src.start[dim] + i) * step_src; for (int i = 0; i < priv->stencil; i++) { fact[i] = 1.0; for (int j = 0; j < priv->stencil; j++) { if (i == j) continue; fact[i] *= (coord_dst - coord_src[j]) / (coord_src[i] - coord_src[j]); } } priv->idx[dim][idx_dst] = idx_src; } free(coord_src); return 0; } static int transfer_lagrange_init(GridTransferContext *ctx) { GridTransferLagrange *priv = ctx->priv; int ret; if (ctx->nb_dims > 2) return -ENOSYS; switch (ctx->op) { case GRID_TRANSFER_LAGRANGE_1: priv->stencil = 2; priv->transfer1d_cont = interp1d_transfer_line_cont_2; priv->transfer1d_generic = interp1d_transfer_line_generic_2; priv->transfer2d_cont = interp2d_transfer_line_cont_2; priv->transfer2d_generic = interp2d_transfer_line_generic_2; break; case GRID_TRANSFER_LAGRANGE_3: priv->transfer1d_cont = interp1d_transfer_line_cont_4; priv->transfer1d_generic = interp1d_transfer_line_generic_4; priv->transfer2d_cont = interp2d_transfer_line_cont_4; priv->transfer2d_generic = interp2d_transfer_line_generic_4; priv->stencil = 4; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_AVX2) { priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_4_avx2; } #endif break; case GRID_TRANSFER_LAGRANGE_5: priv->transfer1d_cont = interp1d_transfer_line_cont_6; priv->transfer1d_generic = interp1d_transfer_line_generic_6; priv->transfer2d_cont = interp2d_transfer_line_cont_6; priv->transfer2d_generic = interp2d_transfer_line_generic_6; priv->stencil = 6; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_AVX2) { priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_6_avx2; } #endif break; case GRID_TRANSFER_LAGRANGE_7: priv->transfer1d_cont = interp1d_transfer_line_cont_8; priv->transfer1d_generic = interp1d_transfer_line_generic_8; priv->transfer2d_cont = interp2d_transfer_line_cont_8; priv->transfer2d_generic = interp2d_transfer_line_generic_8; priv->stencil = 8; break; default: return -EINVAL; } priv->idx = calloc(ctx->nb_dims, sizeof(*priv->idx)); priv->fact = calloc(ctx->nb_dims, sizeof(*priv->fact)); if (!priv->idx || !priv->fact) return -ENOMEM; for (int dim = 0; dim < ctx->nb_dims; dim++) { priv->idx[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->idx[dim])); if (!priv->idx[dim]) return -ENOMEM; priv->fact[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->fact[dim]) * priv->stencil); if (!priv->fact[dim]) return -ENOMEM; ret = transfer_factors_lagrange(ctx, dim); if (ret < 0) return ret; } return 0; } static void transfer_lagrange_free(GridTransferContext *ctx) { GridTransferLagrange *priv = ctx->priv; for (int dim = 0; dim < ctx->nb_dims; dim++) { if (priv->idx) free(priv->idx[dim]); if (priv->fact) free(priv->fact[dim]); } free(priv->idx); free(priv->fact); } static int transfer_interp2d_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { GridTransferLagrange *priv = arg; const NDArray *src = priv->src; NDArray *dst = priv->dst; const ptrdiff_t idx_y = priv->idx[0][job_idx]; const double *fact_y = priv->fact[0] + priv->stencil * job_idx; if (dst->stride[1] == 1 && src->stride[1] == 1) { priv->transfer2d_cont(dst->data + job_idx * dst->stride[0], dst->shape[1], src->data + idx_y * src->stride[0], src->stride[0], priv->idx[1], priv->fact[1], fact_y); } else { priv->transfer2d_generic(dst->data + job_idx * dst->stride[0], dst->shape[1], src->data + idx_y * src->stride[0], src->stride[0], priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]); } return 0; } static int transfer_lagrange_transfer(GridTransferContext *ctx, NDArray *dst, const NDArray *src) { GridTransferLagrange *priv = ctx->priv; priv->src = src; priv->dst = dst; if (ctx->nb_dims == 1) { if (dst->stride[0] == 1 && src->stride[0] == 1) { priv->transfer1d_cont(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0]); } else { priv->transfer1d_generic(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0], dst->stride[0], src->stride[0]); } } else tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp2d_task, priv); priv->src = NULL; priv->dst = NULL; return 0; } static const GridTransfer transfer_lagrange = { .priv_data_size = sizeof(GridTransferLagrange), .init = transfer_lagrange_init, .free = transfer_lagrange_free, .transfer = transfer_lagrange_transfer, }; static const double fw_factors_1[] = { 0.5, 1.0, 0.5 }; static const double fw_factors_2[] = { -1.0 / 16, 9.0 / 16, 1.0, 9.0 / 16, -1.0 / 16 }; static const double fw_factors_3[] = { 3.0 / 256, -25.0 / 256, 75.0 / 128, 1.0, 75.0 / 128, -25.0 / 256, 3.0 / 256 }; static const double fw_factors_4[] = { -5.0 / 2048, 49.0 / 2048, -245.0 / 2048, 1225.0 / 2048, 1.0, 1225.0 / 2048, -245.0 / 2048, 49.0 / 2048, -5.0 / 2048 }; typedef struct GridTransferFW { ptrdiff_t order; const double *factors; NDArray *dst; const NDArray *src; } GridTransferFW; static int fw_transfer_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { GridTransferContext *ctx = arg; GridTransferFW *fw = ctx->priv; NDArray *dst = fw->dst; const NDArray *src = fw->src; const ptrdiff_t ss0 = src->stride[0]; const ptrdiff_t ss1 = src->stride[1]; const double *sd = src->data + ss0 * ((job_idx + ctx->dst.start[0]) * 2 - ctx->src.start[0] - fw->order) + ss1 * ( ctx->dst.start[1] * 2 - ctx->src.start[1] - fw->order); double *dd = dst->data + job_idx * dst->stride[0]; const ptrdiff_t ds1 = dst->stride[1]; for (size_t x = 0; x < dst->shape[1]; x++) { double val = 0.0; for (int i = 0; i < fw->order * 2 + 1; i++) for (int j = 0; j < fw->order * 2 + 1; j++) val += sd[i * ss0 + j * ss1] * fw->factors[i] * fw->factors[j]; *dd = 0.25 * val; dd += ds1; sd += ss1 * 2; } return 0; } static int transfer_fw_transfer(GridTransferContext *ctx, NDArray *dst, const NDArray *src) { GridTransferFW *fw = ctx->priv; fw->src = src; fw->dst = dst; tp_execute(ctx->tp, ctx->dst.size[0], fw_transfer_task, ctx); fw->src = NULL; fw->dst = NULL; return 0; } static int transfer_fw_init(GridTransferContext *ctx) { GridTransferFW *fw = ctx->priv; if (ctx->nb_dims != 2) return -ENOSYS; for (int i = 0; i < ctx->nb_dims; i++) { /* the destination must have twice the stepsize of the source */ if (fabs(ctx->dst.step[i] / ctx->src.step[i] - 2.0) > 1e-14) return -EINVAL; } switch (ctx->op) { case GRID_TRANSFER_FW_1: fw->order = 1; fw->factors = fw_factors_1; break; case GRID_TRANSFER_FW_2: fw->order = 2; fw->factors = fw_factors_2; break; case GRID_TRANSFER_FW_3: fw->order = 3; fw->factors = fw_factors_3; break; case GRID_TRANSFER_FW_4: fw->order = 4; fw->factors = fw_factors_4; break; default: return -EINVAL; } return 0; } static const GridTransfer transfer_fw = { .priv_data_size = sizeof(GridTransferFW), .init = transfer_fw_init, .transfer = transfer_fw_transfer, }; static const GridTransfer *transfer_ops[] = { [GRID_TRANSFER_LAGRANGE_1] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_3] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_5] = &transfer_lagrange, [GRID_TRANSFER_LAGRANGE_7] = &transfer_lagrange, [GRID_TRANSFER_FW_1] = &transfer_fw, [GRID_TRANSFER_FW_2] = &transfer_fw, [GRID_TRANSFER_FW_3] = &transfer_fw, }; int mg2di_gt_transfer(GridTransferContext *ctx, NDArray *dst, const NDArray *src) { const GridTransfer *t = transfer_ops[ctx->op]; if (dst->dims != ctx->nb_dims || src->dims != ctx->nb_dims) return -EINVAL; for (int dir = 0; dir < ctx->nb_dims; dir++) { if (dst->shape[dir] != ctx->dst.size[dir] || src->shape[dir] != ctx->src.size[dir]) return -EINVAL; } return t->transfer(ctx, dst, src); } GridTransferContext *mg2di_gt_alloc(unsigned int nb_dims, enum GridTransferOperator op) { GridTransferContext *ctx; const GridTransfer *t; if (op < 0 || op > ARRAY_ELEMS(transfer_ops)) return NULL; t = transfer_ops[op]; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->src.start = calloc(nb_dims, sizeof(*ctx->src.start)); ctx->src.size = calloc(nb_dims, sizeof(*ctx->src.size)); ctx->src.step = calloc(nb_dims, sizeof(*ctx->src.step)); ctx->dst.start = calloc(nb_dims, sizeof(*ctx->dst.start)); ctx->dst.size = calloc(nb_dims, sizeof(*ctx->dst.size)); ctx->dst.step = calloc(nb_dims, sizeof(*ctx->dst.step)); if (!ctx->src.start || !ctx->src.size || !ctx->src.step || !ctx->dst.start || !ctx->dst.size || !ctx->dst.step) goto fail; ctx->nb_dims = nb_dims; if (t->priv_data_size) { ctx->priv = calloc(1, t->priv_data_size); if (!ctx->priv) goto fail; } ctx->op = op; return ctx; fail: mg2di_gt_free(&ctx); return NULL; } int mg2di_gt_init(GridTransferContext *ctx) { const GridTransfer *t = transfer_ops[ctx->op]; int ret; if (!ctx->tp) return -EINVAL; /* check that the destination grid is inside the source one */ for (int i = 0; i < ctx->nb_dims; i++) { double src_start, src_end, dst_start, dst_end; if (ctx->src.step[i] <= 0.0 || ctx->dst.step[i] <= 0.0 || ctx->src.size[i] < 2 || ctx->dst.size[i] < 1) return -EINVAL; src_start = ctx->src.start[i] * ctx->src.step[i] - ctx->extrapolate_distance; src_end = (ctx->src.start[i] + ctx->src.size[i] - 1 + ctx->extrapolate_distance) * ctx->src.step[i]; dst_start = ctx->dst.start[i] * ctx->dst.step[i]; dst_end = (ctx->dst.start[i] + ctx->dst.size[i] - 1) * ctx->dst.step[i]; if (src_start >= src_end || dst_start - src_start < -1e-12 || src_end - dst_end < -1e-12) return -EINVAL; } if (t->init) { ret = t->init(ctx); if (ret < 0) return ret; } return 0; } void mg2di_gt_free(GridTransferContext **pctx) { GridTransferContext *ctx = *pctx; if (!ctx) return; if (ctx->priv) { const GridTransfer *t = transfer_ops[ctx->op]; if (t->free) t->free(ctx); } free(ctx->priv); free(ctx->src.start); free(ctx->src.size); free(ctx->src.step); free(ctx->dst.start); free(ctx->dst.size); free(ctx->dst.step); free(ctx); *pctx = NULL; }