From 60d1383db4d9f646ce5504edc9b2c437836696db Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 29 Jun 2019 10:23:40 +0200 Subject: transfer: implement and use 1D interpolation Stop abusing "2D of y size 1" for this. --- mg2d.c | 38 +++++------ transfer.c | 156 +++++++++++++++++++++++++++++++-------------- transfer.h | 26 +++++--- transfer_interp.asm | 8 +-- transfer_interp_template.c | 26 +++++++- 5 files changed, 166 insertions(+), 88 deletions(-) diff --git a/mg2d.c b/mg2d.c index 113830b..853e9ab 100644 --- a/mg2d.c +++ b/mg2d.c @@ -640,18 +640,18 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l) double *dst = a_dst->data + mg2d_bnd_is_upper(bnd_idx) * ((a_dst->shape[!ci] - 1) * a_dst->stride[!ci]); const ptrdiff_t stride_dst = a_dst->stride[ci]; - const size_t size_dst[] = { 1, l->solver->domain_size[!ci] }; + const size_t size_dst = l->solver->domain_size[!ci]; ret = mg2di_ndarray_slice(&dc_src, priv->dc_bnd_val[i][bnd_idx], - (Slice [2]){ SLICE_NULL, SLICE(FD_STENCIL_MAX, -FD_STENCIL_MAX, 1) }); + &SLICE(FD_STENCIL_MAX, -FD_STENCIL_MAX, 1)); if (ret < 0) goto fail; - ret = mg2di_ndarray_alloc(&dc_dst, 2, size_dst, 0); + ret = mg2di_ndarray_alloc(&dc_dst, 1, &size_dst, 0); if (ret < 0) goto fail; - gt_bnd = mg2di_gt_alloc(GRID_TRANSFER_LAGRANGE_1); + gt_bnd = mg2di_gt_alloc(1, GRID_TRANSFER_LAGRANGE_1); if (!gt_bnd) { ret = -ENOMEM; goto fail; @@ -661,18 +661,12 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l) gt_bnd->cpuflags = priv->cpuflags; gt_bnd->src.start[0] = 0; - gt_bnd->src.start[1] = 0; - gt_bnd->src.size[0] = 1; - gt_bnd->src.size[1] = priv->dg->domain_size[1]; - gt_bnd->src.step[0] = ctx->step[0]; - gt_bnd->src.step[1] = ctx->step[1]; - - gt_bnd->dst.start[0] = 0; - gt_bnd->dst.start[1] = l->dg->components[priv->local_component].interior.start[!ci]; - gt_bnd->dst.size[0] = 1; - gt_bnd->dst.size[1] = l->dg->components[priv->local_component].interior.size[!ci]; - gt_bnd->dst.step[0] = l->solver->step[0]; - gt_bnd->dst.step[1] = l->solver->step[1]; + gt_bnd->src.size[0] = priv->dg->domain_size[1]; + gt_bnd->src.step[0] = ctx->step[1]; + + gt_bnd->dst.start[0] = l->dg->components[priv->local_component].interior.start[!ci]; + gt_bnd->dst.size[0] = l->dg->components[priv->local_component].interior.size[!ci]; + gt_bnd->dst.step[0] = l->solver->step[1]; ret = mg2di_gt_init(gt_bnd); if (ret < 0) @@ -682,7 +676,7 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l) if (ret < 0) goto fail; - for (int idx = 0; idx < dc_dst->shape[1]; idx++) + for (int idx = 0; idx < dc_dst->shape[0]; idx++) dst[stride_dst * idx] += dc_dst->data[idx]; fail: @@ -730,7 +724,7 @@ static int mg_setup_restrict(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOp GridTransferContext *gt; int ret; - gt = mg2di_gt_alloc(op); + gt = mg2di_gt_alloc(2, op); if (!gt) return -ENOMEM; l->transfer_restrict = gt; @@ -764,7 +758,7 @@ static int mg_setup_prolong(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOpe GridTransferContext *gt; int ret; - gt = mg2di_gt_alloc(op); + gt = mg2di_gt_alloc(2, op); if (!gt) return -ENOMEM; l->transfer_prolong = gt; @@ -1642,8 +1636,8 @@ static MG2DContext *solver_alloc(DomainGeometry *dg, unsigned int local_componen for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(dc->boundaries); bnd_idx++) { const int ci = mg2d_bnd_coord_idx(bnd_idx); - ret = mg2di_ndarray_alloc(&priv->dc_bnd_val[i][bnd_idx], 2, - (size_t [2]){ 1, dg->domain_size[!ci] + 2 * FD_STENCIL_MAX }, + ret = mg2di_ndarray_alloc(&priv->dc_bnd_val[i][bnd_idx], 1, + (size_t [1]){ dg->domain_size[!ci] + 2 * FD_STENCIL_MAX }, NDARRAY_ALLOC_ZERO); if (ret < 0) goto fail; @@ -1994,7 +1988,7 @@ int mg2d_init_guess(MG2DContext *ctx, const double *src, } if (!priv->transfer_init) { - priv->transfer_init = mg2di_gt_alloc(GRID_TRANSFER_LAGRANGE_3); + priv->transfer_init = mg2di_gt_alloc(2, GRID_TRANSFER_LAGRANGE_3); if (!priv->transfer_init) return -ENOMEM; diff --git a/transfer.c b/transfer.c index 9c5e125..ee635b9 100644 --- a/transfer.c +++ b/transfer.c @@ -41,30 +41,35 @@ typedef struct GridTransfer { typedef struct GridTransferLagrange { unsigned int stencil; - void (*transfer_cont) (double *dst, ptrdiff_t dst_len, - const double *src, ptrdiff_t src_stride, - const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); - void (*transfer_generic)(double *dst, ptrdiff_t dst_len, - const double *src, ptrdiff_t src_stride, - const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y, - ptrdiff_t dst_stride0, ptrdiff_t src_stride0); + void (*transfer2d_cont) (double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y); + void (*transfer2d_generic)(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y, + ptrdiff_t dst_stride0, ptrdiff_t src_stride0); + void (*transfer1d_cont) (double *dst, ptrdiff_t dst_len, + const double *src, const ptrdiff_t *idx_x, const double *fact_x); + void (*transfer1d_generic)(double *dst, ptrdiff_t dst_len, + const double *src, const ptrdiff_t *idx_x, const double *fact_x, + ptrdiff_t dst_stride0, ptrdiff_t src_stride0); const NDArray *src; NDArray *dst; - ptrdiff_t *idx[2]; - double *fact[2]; + ptrdiff_t **idx; + double **fact; } GridTransferLagrange; #if HAVE_EXTERNAL_ASM -void mg2di_transfer_interp_line_cont_4_fma3(double *dst, ptrdiff_t dst_len, - const double *src, ptrdiff_t src_stride, - const ptrdiff_t *idx_x, - const double *fact_x, const double *fact_y); -void mg2di_transfer_interp_line_cont_6_fma3(double *dst, ptrdiff_t dst_len, - const double *src, ptrdiff_t src_stride, - const ptrdiff_t *idx_x, - const double *fact_x, const double *fact_y); +void mg2di_transfer_interp2d_line_cont_4_fma3(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, + const double *fact_x, const double *fact_y); +void mg2di_transfer_interp2d_line_cont_6_fma3(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, + const double *fact_x, const double *fact_y); #endif #define STENCIL 2 @@ -153,43 +158,59 @@ static int transfer_lagrange_init(GridTransferContext *ctx) GridTransferLagrange *priv = ctx->priv; int ret; + if (ctx->nb_dims > 2) + return -ENOSYS; + switch (ctx->op) { case GRID_TRANSFER_LAGRANGE_1: priv->stencil = 2; - priv->transfer_cont = interp_transfer_line_cont_2; - priv->transfer_generic = interp_transfer_line_generic_2; + priv->transfer1d_cont = interp1d_transfer_line_cont_2; + priv->transfer1d_generic = interp1d_transfer_line_generic_2; + priv->transfer2d_cont = interp2d_transfer_line_cont_2; + priv->transfer2d_generic = interp2d_transfer_line_generic_2; break; case GRID_TRANSFER_LAGRANGE_3: - priv->transfer_cont = interp_transfer_line_cont_4; - priv->transfer_generic = interp_transfer_line_generic_4; + priv->transfer1d_cont = interp1d_transfer_line_cont_4; + priv->transfer1d_generic = interp1d_transfer_line_generic_4; + priv->transfer2d_cont = interp2d_transfer_line_cont_4; + priv->transfer2d_generic = interp2d_transfer_line_generic_4; priv->stencil = 4; #if HAVE_EXTERNAL_ASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->transfer_cont = mg2di_transfer_interp_line_cont_4_fma3; + priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_4_fma3; } #endif break; case GRID_TRANSFER_LAGRANGE_5: - priv->transfer_cont = interp_transfer_line_cont_6; - priv->transfer_generic = interp_transfer_line_generic_6; + priv->transfer1d_cont = interp1d_transfer_line_cont_6; + priv->transfer1d_generic = interp1d_transfer_line_generic_6; + priv->transfer2d_cont = interp2d_transfer_line_cont_6; + priv->transfer2d_generic = interp2d_transfer_line_generic_6; priv->stencil = 6; #if HAVE_EXTERNAL_ASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->transfer_cont = mg2di_transfer_interp_line_cont_6_fma3; + priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_6_fma3; } #endif break; case GRID_TRANSFER_LAGRANGE_7: - priv->transfer_cont = interp_transfer_line_cont_8; - priv->transfer_generic = interp_transfer_line_generic_8; + priv->transfer1d_cont = interp1d_transfer_line_cont_8; + priv->transfer1d_generic = interp1d_transfer_line_generic_8; + priv->transfer2d_cont = interp2d_transfer_line_cont_8; + priv->transfer2d_generic = interp2d_transfer_line_generic_8; priv->stencil = 8; break; default: return -EINVAL; } - for (int dim = 0; dim < 2; dim++) { + priv->idx = calloc(ctx->nb_dims, sizeof(*priv->idx)); + priv->fact = calloc(ctx->nb_dims, sizeof(*priv->fact)); + if (!priv->idx || !priv->fact) + return -ENOMEM; + + for (int dim = 0; dim < ctx->nb_dims; dim++) { priv->idx[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->idx[dim])); if (!priv->idx[dim]) return -ENOMEM; @@ -210,13 +231,17 @@ static void transfer_lagrange_free(GridTransferContext *ctx) { GridTransferLagrange *priv = ctx->priv; - for (int dim = 0; dim < 2; dim++) { - free(priv->idx[dim]); - free(priv->fact[dim]); + for (int dim = 0; dim < ctx->nb_dims; dim++) { + if (priv->idx) + free(priv->idx[dim]); + if (priv->fact) + free(priv->fact[dim]); } + free(priv->idx); + free(priv->fact); } -static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +static int transfer_interp2d_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { GridTransferLagrange *priv = arg; const NDArray *src = priv->src; @@ -226,13 +251,13 @@ static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int th const double *fact_y = priv->fact[0] + priv->stencil * job_idx; if (dst->stride[1] == 1 && src->stride[1] == 1) { - priv->transfer_cont(dst->data + job_idx * dst->stride[0], dst->shape[1], - src->data + idx_y * src->stride[0], src->stride[0], - priv->idx[1], priv->fact[1], fact_y); + priv->transfer2d_cont(dst->data + job_idx * dst->stride[0], dst->shape[1], + src->data + idx_y * src->stride[0], src->stride[0], + priv->idx[1], priv->fact[1], fact_y); } else { - priv->transfer_generic(dst->data + job_idx * dst->stride[0], dst->shape[1], - src->data + idx_y * src->stride[0], src->stride[0], - priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]); + priv->transfer2d_generic(dst->data + job_idx * dst->stride[0], dst->shape[1], + src->data + idx_y * src->stride[0], src->stride[0], + priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]); } return 0; @@ -246,7 +271,15 @@ static int transfer_lagrange_transfer(GridTransferContext *ctx, priv->src = src; priv->dst = dst; - tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp_task, priv); + if (ctx->nb_dims == 1) { + if (dst->stride[0] == 1 && src->stride[0] == 1) { + priv->transfer1d_cont(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0]); + } else { + priv->transfer1d_generic(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0], + dst->stride[0], src->stride[0]); + } + } else + tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp2d_task, priv); priv->src = NULL; priv->dst = NULL; @@ -325,7 +358,10 @@ static int transfer_fw_init(GridTransferContext *ctx) { GridTransferFW *fw = ctx->priv; - for (int i = 0; i < 2; i++) { + if (ctx->nb_dims != 2) + return -ENOSYS; + + for (int i = 0; i < ctx->nb_dims; i++) { /* the destination must have twice the stepsize of the source */ if (fabs(ctx->dst.step[i] / ctx->src.step[i] - 2.0) > 1e-14) return -EINVAL; @@ -376,15 +412,18 @@ int mg2di_gt_transfer(GridTransferContext *ctx, { const GridTransfer *t = transfer_ops[ctx->op]; - if (dst->dims != 2 || src->dims != 2 || - dst->shape[0] != ctx->dst.size[0] || dst->shape[1] != ctx->dst.size[1] || - src->shape[0] != ctx->src.size[0] || src->shape[1] != ctx->src.size[1]) + if (dst->dims != ctx->nb_dims || src->dims != ctx->nb_dims) return -EINVAL; + for (int dir = 0; dir < ctx->nb_dims; dir++) { + if (dst->shape[dir] != ctx->dst.size[dir] || src->shape[dir] != ctx->src.size[dir]) + return -EINVAL; + } + return t->transfer(ctx, dst, src); } -GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op) +GridTransferContext *mg2di_gt_alloc(unsigned int nb_dims, enum GridTransferOperator op) { GridTransferContext *ctx; const GridTransfer *t; @@ -397,16 +436,28 @@ GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op) if (!ctx) return NULL; + ctx->src.start = calloc(nb_dims, sizeof(*ctx->src.start)); + ctx->src.size = calloc(nb_dims, sizeof(*ctx->src.size)); + ctx->src.step = calloc(nb_dims, sizeof(*ctx->src.step)); + ctx->dst.start = calloc(nb_dims, sizeof(*ctx->dst.start)); + ctx->dst.size = calloc(nb_dims, sizeof(*ctx->dst.size)); + ctx->dst.step = calloc(nb_dims, sizeof(*ctx->dst.step)); + if (!ctx->src.start || !ctx->src.size || !ctx->src.step || + !ctx->dst.start || !ctx->dst.size || !ctx->dst.step) + goto fail; + ctx->nb_dims = nb_dims; + if (t->priv_data_size) { ctx->priv = calloc(1, t->priv_data_size); - if (!ctx->priv) { - free(ctx); - return NULL; - } + if (!ctx->priv) + goto fail; } ctx->op = op; return ctx; +fail: + mg2di_gt_free(&ctx); + return NULL; } int mg2di_gt_init(GridTransferContext *ctx) @@ -418,7 +469,7 @@ int mg2di_gt_init(GridTransferContext *ctx) return -EINVAL; /* check that the destination grid is inside the source one */ - for (int i = 0; i < 2; i++) { + for (int i = 0; i < ctx->nb_dims; i++) { double src_start, src_end, dst_start, dst_end; if (ctx->src.step[i] <= 0.0 || ctx->dst.step[i] <= 0.0 || @@ -459,6 +510,13 @@ void mg2di_gt_free(GridTransferContext **pctx) free(ctx->priv); + free(ctx->src.start); + free(ctx->src.size); + free(ctx->src.step); + free(ctx->dst.start); + free(ctx->dst.size); + free(ctx->dst.step); + free(ctx); *pctx = NULL; } diff --git a/transfer.h b/transfer.h index 2ed4aea..5c434e3 100644 --- a/transfer.h +++ b/transfer.h @@ -36,25 +36,26 @@ enum GridTransferOperator { }; /** - * A 2-dimensional grid with regular spacing. + * An N-dimensional grid with regular spacing. */ -typedef struct RegularGrid2D { +typedef struct RegularGrid { /** * Indices of the grid origin, relative to coordinate origin. - * (o0, o1) = (start[0] * step[0], start[1] * step[1]) + * For each dimension i=0..N-1: + * o_i = start[i] * step[i] */ - ptrdiff_t start[2]; + ptrdiff_t *start; /** * Number of points in the domain. */ - size_t size[2]; + size_t *size; /** * Size of the step between neighbouring grid points. */ - double step[2]; -} RegularGrid2D; + double *step; +} RegularGrid; typedef struct GridTransferContext { /** @@ -76,15 +77,20 @@ typedef struct GridTransferContext { */ enum GridTransferOperator op; + /** + * Number of dimensions. + */ + unsigned int nb_dims; + /** * Source grid geometry, must be filled by the caller. */ - RegularGrid2D src; + RegularGrid src; /** * Destination grid geometry, must be filled by the caller. */ - RegularGrid2D dst; + RegularGrid dst; /** * Allow extrapolating this many points beyond the source rectangle. @@ -98,7 +104,7 @@ typedef struct GridTransferContext { * * @return newly allocated transfer context on success, NULL on failure */ -GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op); +GridTransferContext *mg2di_gt_alloc(unsigned int dims, enum GridTransferOperator op); /** * Initialize the tranfer context after all the parameters have been set. diff --git a/transfer_interp.asm b/transfer_interp.asm index 1b1fe7d..e711b8f 100644 --- a/transfer_interp.asm +++ b/transfer_interp.asm @@ -22,8 +22,8 @@ SECTION .text INIT_YMM fma3 -cglobal transfer_interp_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ - idx_x_val +cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ + idx_x_val shl src_strideq, 3 shl dst_lenq, 3 @@ -74,8 +74,8 @@ cglobal transfer_interp_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx RET INIT_YMM fma3 -cglobal transfer_interp_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ - idx_x_val, offset6 +cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ + idx_x_val, offset6 shl src_strideq, 3 shl dst_lenq, 3 diff --git a/transfer_interp_template.c b/transfer_interp_template.c index e7ff14a..7bfc89e 100644 --- a/transfer_interp_template.c +++ b/transfer_interp_template.c @@ -27,9 +27,9 @@ #define FUNC(name) FUNC2(name, CONT, STENCIL) static void -FUNC(interp_transfer_line)(double *dst, ptrdiff_t dst_len, - const double *src, ptrdiff_t src_stride, - const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y +FUNC(interp2d_transfer_line)(double *dst, ptrdiff_t dst_len, + const double *src, ptrdiff_t src_stride, + const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y #if !CONTIGUOUS , ptrdiff_t dst_stride0, ptrdiff_t src_stride0 # define SSTRIDE1 src_stride0 @@ -56,6 +56,26 @@ FUNC(interp_transfer_line)(double *dst, ptrdiff_t dst_len, } } +static void +FUNC(interp1d_transfer_line)(double *dst, ptrdiff_t dst_len, + const double *src, const ptrdiff_t *idx_x, const double *fact_x +#if !CONTIGUOUS + , ptrdiff_t dst_stride0, ptrdiff_t src_stride0 +#endif + ) +{ + for (size_t x = 0; x < dst_len; x++) { + const double *src_data = src + SSTRIDE1 * idx_x[x]; + + double val = 0.0; + + for (ptrdiff_t idx = 0; idx < STENCIL; idx++) + val += src_data[idx * SSTRIDE1] * fact_x[STENCIL * x + idx]; + + dst[x * DSTRIDE1] = val; + } +} + #undef SSTRIDE1 #undef DSTRIDE1 #undef CONT -- cgit v1.2.3