aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-06-29 10:23:40 +0200
committerAnton Khirnov <anton@khirnov.net>2019-06-29 11:21:56 +0200
commit60d1383db4d9f646ce5504edc9b2c437836696db (patch)
tree12fc2d690c6add4fa849e97e614bde60ca60372b
parent6d9c352ff599201dacff330004a7c6cd54705aaa (diff)
transfer: implement and use 1D interpolation
Stop abusing "2D of y size 1" for this.
-rw-r--r--mg2d.c38
-rw-r--r--transfer.c156
-rw-r--r--transfer.h26
-rw-r--r--transfer_interp.asm8
-rw-r--r--transfer_interp_template.c26
5 files changed, 166 insertions, 88 deletions
diff --git a/mg2d.c b/mg2d.c
index 113830b..853e9ab 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -640,18 +640,18 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l)
double *dst = a_dst->data + mg2d_bnd_is_upper(bnd_idx) *
((a_dst->shape[!ci] - 1) * a_dst->stride[!ci]);
const ptrdiff_t stride_dst = a_dst->stride[ci];
- const size_t size_dst[] = { 1, l->solver->domain_size[!ci] };
+ const size_t size_dst = l->solver->domain_size[!ci];
ret = mg2di_ndarray_slice(&dc_src, priv->dc_bnd_val[i][bnd_idx],
- (Slice [2]){ SLICE_NULL, SLICE(FD_STENCIL_MAX, -FD_STENCIL_MAX, 1) });
+ &SLICE(FD_STENCIL_MAX, -FD_STENCIL_MAX, 1));
if (ret < 0)
goto fail;
- ret = mg2di_ndarray_alloc(&dc_dst, 2, size_dst, 0);
+ ret = mg2di_ndarray_alloc(&dc_dst, 1, &size_dst, 0);
if (ret < 0)
goto fail;
- gt_bnd = mg2di_gt_alloc(GRID_TRANSFER_LAGRANGE_1);
+ gt_bnd = mg2di_gt_alloc(1, GRID_TRANSFER_LAGRANGE_1);
if (!gt_bnd) {
ret = -ENOMEM;
goto fail;
@@ -661,18 +661,12 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l)
gt_bnd->cpuflags = priv->cpuflags;
gt_bnd->src.start[0] = 0;
- gt_bnd->src.start[1] = 0;
- gt_bnd->src.size[0] = 1;
- gt_bnd->src.size[1] = priv->dg->domain_size[1];
- gt_bnd->src.step[0] = ctx->step[0];
- gt_bnd->src.step[1] = ctx->step[1];
-
- gt_bnd->dst.start[0] = 0;
- gt_bnd->dst.start[1] = l->dg->components[priv->local_component].interior.start[!ci];
- gt_bnd->dst.size[0] = 1;
- gt_bnd->dst.size[1] = l->dg->components[priv->local_component].interior.size[!ci];
- gt_bnd->dst.step[0] = l->solver->step[0];
- gt_bnd->dst.step[1] = l->solver->step[1];
+ gt_bnd->src.size[0] = priv->dg->domain_size[1];
+ gt_bnd->src.step[0] = ctx->step[1];
+
+ gt_bnd->dst.start[0] = l->dg->components[priv->local_component].interior.start[!ci];
+ gt_bnd->dst.size[0] = l->dg->components[priv->local_component].interior.size[!ci];
+ gt_bnd->dst.step[0] = l->solver->step[1];
ret = mg2di_gt_init(gt_bnd);
if (ret < 0)
@@ -682,7 +676,7 @@ static int diff_coeffs_fixup(MG2DContext *ctx, MG2DLevel *l)
if (ret < 0)
goto fail;
- for (int idx = 0; idx < dc_dst->shape[1]; idx++)
+ for (int idx = 0; idx < dc_dst->shape[0]; idx++)
dst[stride_dst * idx] += dc_dst->data[idx];
fail:
@@ -730,7 +724,7 @@ static int mg_setup_restrict(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOp
GridTransferContext *gt;
int ret;
- gt = mg2di_gt_alloc(op);
+ gt = mg2di_gt_alloc(2, op);
if (!gt)
return -ENOMEM;
l->transfer_restrict = gt;
@@ -764,7 +758,7 @@ static int mg_setup_prolong(MG2DContext *ctx, MG2DLevel *l, enum GridTransferOpe
GridTransferContext *gt;
int ret;
- gt = mg2di_gt_alloc(op);
+ gt = mg2di_gt_alloc(2, op);
if (!gt)
return -ENOMEM;
l->transfer_prolong = gt;
@@ -1642,8 +1636,8 @@ static MG2DContext *solver_alloc(DomainGeometry *dg, unsigned int local_componen
for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(dc->boundaries); bnd_idx++) {
const int ci = mg2d_bnd_coord_idx(bnd_idx);
- ret = mg2di_ndarray_alloc(&priv->dc_bnd_val[i][bnd_idx], 2,
- (size_t [2]){ 1, dg->domain_size[!ci] + 2 * FD_STENCIL_MAX },
+ ret = mg2di_ndarray_alloc(&priv->dc_bnd_val[i][bnd_idx], 1,
+ (size_t [1]){ dg->domain_size[!ci] + 2 * FD_STENCIL_MAX },
NDARRAY_ALLOC_ZERO);
if (ret < 0)
goto fail;
@@ -1994,7 +1988,7 @@ int mg2d_init_guess(MG2DContext *ctx, const double *src,
}
if (!priv->transfer_init) {
- priv->transfer_init = mg2di_gt_alloc(GRID_TRANSFER_LAGRANGE_3);
+ priv->transfer_init = mg2di_gt_alloc(2, GRID_TRANSFER_LAGRANGE_3);
if (!priv->transfer_init)
return -ENOMEM;
diff --git a/transfer.c b/transfer.c
index 9c5e125..ee635b9 100644
--- a/transfer.c
+++ b/transfer.c
@@ -41,30 +41,35 @@ typedef struct GridTransfer {
typedef struct GridTransferLagrange {
unsigned int stencil;
- void (*transfer_cont) (double *dst, ptrdiff_t dst_len,
- const double *src, ptrdiff_t src_stride,
- const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y);
- void (*transfer_generic)(double *dst, ptrdiff_t dst_len,
- const double *src, ptrdiff_t src_stride,
- const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y,
- ptrdiff_t dst_stride0, ptrdiff_t src_stride0);
+ void (*transfer2d_cont) (double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y);
+ void (*transfer2d_generic)(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y,
+ ptrdiff_t dst_stride0, ptrdiff_t src_stride0);
+ void (*transfer1d_cont) (double *dst, ptrdiff_t dst_len,
+ const double *src, const ptrdiff_t *idx_x, const double *fact_x);
+ void (*transfer1d_generic)(double *dst, ptrdiff_t dst_len,
+ const double *src, const ptrdiff_t *idx_x, const double *fact_x,
+ ptrdiff_t dst_stride0, ptrdiff_t src_stride0);
const NDArray *src;
NDArray *dst;
- ptrdiff_t *idx[2];
- double *fact[2];
+ ptrdiff_t **idx;
+ double **fact;
} GridTransferLagrange;
#if HAVE_EXTERNAL_ASM
-void mg2di_transfer_interp_line_cont_4_fma3(double *dst, ptrdiff_t dst_len,
- const double *src, ptrdiff_t src_stride,
- const ptrdiff_t *idx_x,
- const double *fact_x, const double *fact_y);
-void mg2di_transfer_interp_line_cont_6_fma3(double *dst, ptrdiff_t dst_len,
- const double *src, ptrdiff_t src_stride,
- const ptrdiff_t *idx_x,
- const double *fact_x, const double *fact_y);
+void mg2di_transfer_interp2d_line_cont_4_fma3(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x,
+ const double *fact_x, const double *fact_y);
+void mg2di_transfer_interp2d_line_cont_6_fma3(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x,
+ const double *fact_x, const double *fact_y);
#endif
#define STENCIL 2
@@ -153,43 +158,59 @@ static int transfer_lagrange_init(GridTransferContext *ctx)
GridTransferLagrange *priv = ctx->priv;
int ret;
+ if (ctx->nb_dims > 2)
+ return -ENOSYS;
+
switch (ctx->op) {
case GRID_TRANSFER_LAGRANGE_1:
priv->stencil = 2;
- priv->transfer_cont = interp_transfer_line_cont_2;
- priv->transfer_generic = interp_transfer_line_generic_2;
+ priv->transfer1d_cont = interp1d_transfer_line_cont_2;
+ priv->transfer1d_generic = interp1d_transfer_line_generic_2;
+ priv->transfer2d_cont = interp2d_transfer_line_cont_2;
+ priv->transfer2d_generic = interp2d_transfer_line_generic_2;
break;
case GRID_TRANSFER_LAGRANGE_3:
- priv->transfer_cont = interp_transfer_line_cont_4;
- priv->transfer_generic = interp_transfer_line_generic_4;
+ priv->transfer1d_cont = interp1d_transfer_line_cont_4;
+ priv->transfer1d_generic = interp1d_transfer_line_generic_4;
+ priv->transfer2d_cont = interp2d_transfer_line_cont_4;
+ priv->transfer2d_generic = interp2d_transfer_line_generic_4;
priv->stencil = 4;
#if HAVE_EXTERNAL_ASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->transfer_cont = mg2di_transfer_interp_line_cont_4_fma3;
+ priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_4_fma3;
}
#endif
break;
case GRID_TRANSFER_LAGRANGE_5:
- priv->transfer_cont = interp_transfer_line_cont_6;
- priv->transfer_generic = interp_transfer_line_generic_6;
+ priv->transfer1d_cont = interp1d_transfer_line_cont_6;
+ priv->transfer1d_generic = interp1d_transfer_line_generic_6;
+ priv->transfer2d_cont = interp2d_transfer_line_cont_6;
+ priv->transfer2d_generic = interp2d_transfer_line_generic_6;
priv->stencil = 6;
#if HAVE_EXTERNAL_ASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->transfer_cont = mg2di_transfer_interp_line_cont_6_fma3;
+ priv->transfer2d_cont = mg2di_transfer_interp2d_line_cont_6_fma3;
}
#endif
break;
case GRID_TRANSFER_LAGRANGE_7:
- priv->transfer_cont = interp_transfer_line_cont_8;
- priv->transfer_generic = interp_transfer_line_generic_8;
+ priv->transfer1d_cont = interp1d_transfer_line_cont_8;
+ priv->transfer1d_generic = interp1d_transfer_line_generic_8;
+ priv->transfer2d_cont = interp2d_transfer_line_cont_8;
+ priv->transfer2d_generic = interp2d_transfer_line_generic_8;
priv->stencil = 8;
break;
default:
return -EINVAL;
}
- for (int dim = 0; dim < 2; dim++) {
+ priv->idx = calloc(ctx->nb_dims, sizeof(*priv->idx));
+ priv->fact = calloc(ctx->nb_dims, sizeof(*priv->fact));
+ if (!priv->idx || !priv->fact)
+ return -ENOMEM;
+
+ for (int dim = 0; dim < ctx->nb_dims; dim++) {
priv->idx[dim] = calloc(ctx->dst.size[dim], sizeof(*priv->idx[dim]));
if (!priv->idx[dim])
return -ENOMEM;
@@ -210,13 +231,17 @@ static void transfer_lagrange_free(GridTransferContext *ctx)
{
GridTransferLagrange *priv = ctx->priv;
- for (int dim = 0; dim < 2; dim++) {
- free(priv->idx[dim]);
- free(priv->fact[dim]);
+ for (int dim = 0; dim < ctx->nb_dims; dim++) {
+ if (priv->idx)
+ free(priv->idx[dim]);
+ if (priv->fact)
+ free(priv->fact[dim]);
}
+ free(priv->idx);
+ free(priv->fact);
}
-static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+static int transfer_interp2d_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
{
GridTransferLagrange *priv = arg;
const NDArray *src = priv->src;
@@ -226,13 +251,13 @@ static int transfer_interp_task(void *arg, unsigned int job_idx, unsigned int th
const double *fact_y = priv->fact[0] + priv->stencil * job_idx;
if (dst->stride[1] == 1 && src->stride[1] == 1) {
- priv->transfer_cont(dst->data + job_idx * dst->stride[0], dst->shape[1],
- src->data + idx_y * src->stride[0], src->stride[0],
- priv->idx[1], priv->fact[1], fact_y);
+ priv->transfer2d_cont(dst->data + job_idx * dst->stride[0], dst->shape[1],
+ src->data + idx_y * src->stride[0], src->stride[0],
+ priv->idx[1], priv->fact[1], fact_y);
} else {
- priv->transfer_generic(dst->data + job_idx * dst->stride[0], dst->shape[1],
- src->data + idx_y * src->stride[0], src->stride[0],
- priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]);
+ priv->transfer2d_generic(dst->data + job_idx * dst->stride[0], dst->shape[1],
+ src->data + idx_y * src->stride[0], src->stride[0],
+ priv->idx[1], priv->fact[1], fact_y, dst->stride[1], src->stride[1]);
}
return 0;
@@ -246,7 +271,15 @@ static int transfer_lagrange_transfer(GridTransferContext *ctx,
priv->src = src;
priv->dst = dst;
- tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp_task, priv);
+ if (ctx->nb_dims == 1) {
+ if (dst->stride[0] == 1 && src->stride[0] == 1) {
+ priv->transfer1d_cont(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0]);
+ } else {
+ priv->transfer1d_generic(dst->data, dst->shape[0], src->data, priv->idx[0], priv->fact[0],
+ dst->stride[0], src->stride[0]);
+ }
+ } else
+ tp_execute(ctx->tp, ctx->dst.size[0], transfer_interp2d_task, priv);
priv->src = NULL;
priv->dst = NULL;
@@ -325,7 +358,10 @@ static int transfer_fw_init(GridTransferContext *ctx)
{
GridTransferFW *fw = ctx->priv;
- for (int i = 0; i < 2; i++) {
+ if (ctx->nb_dims != 2)
+ return -ENOSYS;
+
+ for (int i = 0; i < ctx->nb_dims; i++) {
/* the destination must have twice the stepsize of the source */
if (fabs(ctx->dst.step[i] / ctx->src.step[i] - 2.0) > 1e-14)
return -EINVAL;
@@ -376,15 +412,18 @@ int mg2di_gt_transfer(GridTransferContext *ctx,
{
const GridTransfer *t = transfer_ops[ctx->op];
- if (dst->dims != 2 || src->dims != 2 ||
- dst->shape[0] != ctx->dst.size[0] || dst->shape[1] != ctx->dst.size[1] ||
- src->shape[0] != ctx->src.size[0] || src->shape[1] != ctx->src.size[1])
+ if (dst->dims != ctx->nb_dims || src->dims != ctx->nb_dims)
return -EINVAL;
+ for (int dir = 0; dir < ctx->nb_dims; dir++) {
+ if (dst->shape[dir] != ctx->dst.size[dir] || src->shape[dir] != ctx->src.size[dir])
+ return -EINVAL;
+ }
+
return t->transfer(ctx, dst, src);
}
-GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op)
+GridTransferContext *mg2di_gt_alloc(unsigned int nb_dims, enum GridTransferOperator op)
{
GridTransferContext *ctx;
const GridTransfer *t;
@@ -397,16 +436,28 @@ GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op)
if (!ctx)
return NULL;
+ ctx->src.start = calloc(nb_dims, sizeof(*ctx->src.start));
+ ctx->src.size = calloc(nb_dims, sizeof(*ctx->src.size));
+ ctx->src.step = calloc(nb_dims, sizeof(*ctx->src.step));
+ ctx->dst.start = calloc(nb_dims, sizeof(*ctx->dst.start));
+ ctx->dst.size = calloc(nb_dims, sizeof(*ctx->dst.size));
+ ctx->dst.step = calloc(nb_dims, sizeof(*ctx->dst.step));
+ if (!ctx->src.start || !ctx->src.size || !ctx->src.step ||
+ !ctx->dst.start || !ctx->dst.size || !ctx->dst.step)
+ goto fail;
+ ctx->nb_dims = nb_dims;
+
if (t->priv_data_size) {
ctx->priv = calloc(1, t->priv_data_size);
- if (!ctx->priv) {
- free(ctx);
- return NULL;
- }
+ if (!ctx->priv)
+ goto fail;
}
ctx->op = op;
return ctx;
+fail:
+ mg2di_gt_free(&ctx);
+ return NULL;
}
int mg2di_gt_init(GridTransferContext *ctx)
@@ -418,7 +469,7 @@ int mg2di_gt_init(GridTransferContext *ctx)
return -EINVAL;
/* check that the destination grid is inside the source one */
- for (int i = 0; i < 2; i++) {
+ for (int i = 0; i < ctx->nb_dims; i++) {
double src_start, src_end, dst_start, dst_end;
if (ctx->src.step[i] <= 0.0 || ctx->dst.step[i] <= 0.0 ||
@@ -459,6 +510,13 @@ void mg2di_gt_free(GridTransferContext **pctx)
free(ctx->priv);
+ free(ctx->src.start);
+ free(ctx->src.size);
+ free(ctx->src.step);
+ free(ctx->dst.start);
+ free(ctx->dst.size);
+ free(ctx->dst.step);
+
free(ctx);
*pctx = NULL;
}
diff --git a/transfer.h b/transfer.h
index 2ed4aea..5c434e3 100644
--- a/transfer.h
+++ b/transfer.h
@@ -36,25 +36,26 @@ enum GridTransferOperator {
};
/**
- * A 2-dimensional grid with regular spacing.
+ * An N-dimensional grid with regular spacing.
*/
-typedef struct RegularGrid2D {
+typedef struct RegularGrid {
/**
* Indices of the grid origin, relative to coordinate origin.
- * (o0, o1) = (start[0] * step[0], start[1] * step[1])
+ * For each dimension i=0..N-1:
+ * o_i = start[i] * step[i]
*/
- ptrdiff_t start[2];
+ ptrdiff_t *start;
/**
* Number of points in the domain.
*/
- size_t size[2];
+ size_t *size;
/**
* Size of the step between neighbouring grid points.
*/
- double step[2];
-} RegularGrid2D;
+ double *step;
+} RegularGrid;
typedef struct GridTransferContext {
/**
@@ -77,14 +78,19 @@ typedef struct GridTransferContext {
enum GridTransferOperator op;
/**
+ * Number of dimensions.
+ */
+ unsigned int nb_dims;
+
+ /**
* Source grid geometry, must be filled by the caller.
*/
- RegularGrid2D src;
+ RegularGrid src;
/**
* Destination grid geometry, must be filled by the caller.
*/
- RegularGrid2D dst;
+ RegularGrid dst;
/**
* Allow extrapolating this many points beyond the source rectangle.
@@ -98,7 +104,7 @@ typedef struct GridTransferContext {
*
* @return newly allocated transfer context on success, NULL on failure
*/
-GridTransferContext *mg2di_gt_alloc(enum GridTransferOperator op);
+GridTransferContext *mg2di_gt_alloc(unsigned int dims, enum GridTransferOperator op);
/**
* Initialize the tranfer context after all the parameters have been set.
diff --git a/transfer_interp.asm b/transfer_interp.asm
index 1b1fe7d..e711b8f 100644
--- a/transfer_interp.asm
+++ b/transfer_interp.asm
@@ -22,8 +22,8 @@
SECTION .text
INIT_YMM fma3
-cglobal transfer_interp_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
- idx_x_val
+cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
+ idx_x_val
shl src_strideq, 3
shl dst_lenq, 3
@@ -74,8 +74,8 @@ cglobal transfer_interp_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx
RET
INIT_YMM fma3
-cglobal transfer_interp_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
- idx_x_val, offset6
+cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
+ idx_x_val, offset6
shl src_strideq, 3
shl dst_lenq, 3
diff --git a/transfer_interp_template.c b/transfer_interp_template.c
index e7ff14a..7bfc89e 100644
--- a/transfer_interp_template.c
+++ b/transfer_interp_template.c
@@ -27,9 +27,9 @@
#define FUNC(name) FUNC2(name, CONT, STENCIL)
static void
-FUNC(interp_transfer_line)(double *dst, ptrdiff_t dst_len,
- const double *src, ptrdiff_t src_stride,
- const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y
+FUNC(interp2d_transfer_line)(double *dst, ptrdiff_t dst_len,
+ const double *src, ptrdiff_t src_stride,
+ const ptrdiff_t *idx_x, const double *fact_x, const double *fact_y
#if !CONTIGUOUS
, ptrdiff_t dst_stride0, ptrdiff_t src_stride0
# define SSTRIDE1 src_stride0
@@ -56,6 +56,26 @@ FUNC(interp_transfer_line)(double *dst, ptrdiff_t dst_len,
}
}
+static void
+FUNC(interp1d_transfer_line)(double *dst, ptrdiff_t dst_len,
+ const double *src, const ptrdiff_t *idx_x, const double *fact_x
+#if !CONTIGUOUS
+ , ptrdiff_t dst_stride0, ptrdiff_t src_stride0
+#endif
+ )
+{
+ for (size_t x = 0; x < dst_len; x++) {
+ const double *src_data = src + SSTRIDE1 * idx_x[x];
+
+ double val = 0.0;
+
+ for (ptrdiff_t idx = 0; idx < STENCIL; idx++)
+ val += src_data[idx * SSTRIDE1] * fact_x[STENCIL * x + idx];
+
+ dst[x * DSTRIDE1] = val;
+ }
+}
+
#undef SSTRIDE1
#undef DSTRIDE1
#undef CONT