aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-23 18:20:32 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-25 08:27:08 +0100
commit71ad1d672f3724426184e799252b87c1d54cb115 (patch)
treea2653bef72ee670300d0a7dc5c6b99edb4164296
parentd215c872bbdf8ba733f9a9fbd586374b841fdfb5 (diff)
mg2d: use the new transfer API for inter-grid transfers
-rw-r--r--mg2d.c452
-rw-r--r--transfer.c2
2 files changed, 130 insertions, 324 deletions
diff --git a/mg2d.c b/mg2d.c
index 531b061..ce92ba2 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -34,14 +34,18 @@
#include "mg2d_boundary_internal.h"
#include "mg2d_constants.h"
#include "ndarray.h"
+#include "transfer.h"
typedef struct MG2DLevel {
unsigned int depth;
EGSContext *solver;
- double *prolong_tmp;
- ptrdiff_t prolong_tmp_stride;
+ GridTransferContext *transfer_restrict;
+ GridTransferContext *transfer_prolong;
+
+ NDArray *prolong_tmp_base;
+ NDArray *prolong_tmp;
struct MG2DLevel *child;
@@ -89,313 +93,18 @@ static void log_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_
prefix, level->depth, step_desc, res_old, res_new, res_old / res_new);
}
-static void mg_restrict_inject(EGSContext *coarse, double *dst, ptrdiff_t dst_stride,
- EGSContext *fine, const double *src, ptrdiff_t src_stride)
-{
- for (size_t j = 0; j < coarse->domain_size[1]; j++)
- for (size_t i = 0; i < coarse->domain_size[0]; i++) {
- const ptrdiff_t idx_dst = j * dst_stride + i;
-
- const size_t fine_i = i * 2;
- const size_t fine_j = j * 2;
- const ptrdiff_t idx_src = fine_j * src_stride + fine_i;
-
- dst[idx_dst] = src[idx_src];
- }
-}
-
-static void mg_restrict_fw(EGSContext *coarse, double *dst, ptrdiff_t dst_stride,
- EGSContext *fine, const double *src, ptrdiff_t src_stride)
-{
- for (size_t j = 0; j < coarse->domain_size[1]; j++)
- for (size_t i = 0; i < coarse->domain_size[0]; i++) {
- const ptrdiff_t idx_dst = j * dst_stride + i;
-
- const size_t fine_i = i * 2;
- const size_t fine_j = j * 2;
- const ptrdiff_t idx_src = fine_j * src_stride + fine_i;
-
- dst[idx_dst] = 0.25 * src[idx_src] + 0.125 * (src[idx_src + 1] + src[idx_src - 1] + src[idx_src + src_stride] + src[idx_src - src_stride])
- + 0.0625 * (src[idx_src + 1 + src_stride] + src[idx_src - 1 + src_stride] + src[idx_src + 1 - src_stride] + src[idx_src - 1 - src_stride]);
- }
-}
-
-static void mg_prolongate(EGSContext *fine, double *dst, ptrdiff_t dst_stride,
- EGSContext *coarse, const double *src, ptrdiff_t src_stride)
-{
- // for simplicity, this code writes one point over the domain size;
- // this allows us to avoid treating the boundary layers specially
- // dst is allocated with extra ghost zones for this purpose
- for (size_t j = 0; j < coarse->domain_size[1]; j++)
- for (size_t i = 0; i < coarse->domain_size[0]; i++) {
- const ptrdiff_t idx_src = j * src_stride + i;
- const ptrdiff_t idx_dst = 2 * (j * dst_stride + i);
-
- const double src00 = src[idx_src];
- const double src10 = src[idx_src + 1];
- const double src01 = src[idx_src + src_stride];
- const double src11 = src[idx_src + src_stride + 1];
-
- dst[idx_dst] = src00;
- dst[idx_dst + 1] = 0.5 * (src00 + src10);
- dst[idx_dst + dst_stride] = 0.5 * (src00 + src01);
- dst[idx_dst + dst_stride + 1] = 0.25 * (src00 + src10 + src01 + src11);
- }
-}
-
-static void mg_prolongate_bicubic(EGSContext *fine, double *dst, ptrdiff_t dst_stride,
- EGSContext *coarse, const double *src, ptrdiff_t src_stride)
-{
- // for simplicity, this code writes one point over the domain size;
- // this allows us to avoid treating the boundary layers specially
- // dst is allocated with extra ghost zones for this purpose
- for (size_t j = 0; j < coarse->domain_size[1]; j++)
- for (size_t i = 0; i < coarse->domain_size[0]; i++) {
- const ptrdiff_t idx_src = j * src_stride + i;
- const ptrdiff_t idx_dst = 2 * (j * dst_stride + i);
-
- const double src00 = src[idx_src];
- const double srcm10 = src[idx_src - 1];
- const double srcp10 = src[idx_src + 1];
- const double srcp20 = src[idx_src + 2];
-
- const double srcm1m1 = src[idx_src - src_stride - 1];
- const double src0m1 = src[idx_src - src_stride];
- const double srcp1m1 = src[idx_src - src_stride + 1];
- const double srcp2m1 = src[idx_src - src_stride + 2];
-
- const double srcm1p1 = src[idx_src + src_stride - 1];
- const double src0p1 = src[idx_src + src_stride];
- const double srcp1p1 = src[idx_src + src_stride + 1];
- const double srcp2p1 = src[idx_src + src_stride + 2];
-
- const double srcm1p2 = src[idx_src + 2 * src_stride - 1];
- const double src0p2 = src[idx_src + 2 * src_stride];
- const double srcp1p2 = src[idx_src + 2 * src_stride + 1];
- const double srcp2p2 = src[idx_src + 2 * src_stride + 2];
-
- dst[idx_dst] = src00;
- dst[idx_dst + 1] = (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20) / 16.0;
- dst[idx_dst + dst_stride] = (-1.0 * src0m1 + 9.0 * src00 + 9.0 * src0p1 - 1.0 * src0p2) / 16.0;
- dst[idx_dst + dst_stride + 1] = (- 1.0 * (-1.0 * srcm1m1 + 9.0 * src0m1 + 9.0 * srcp1m1 - 1.0 * srcp2m1)
- + 9.0 * (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20)
- + 9.0 * (-1.0 * srcm1p1 + 9.0 * src0p1 + 9.0 * srcp1p1 - 1.0 * srcp2p1)
- - 1.0 * (-1.0 * srcm1p2 + 9.0 * src0p2 + 9.0 * srcp1p2 - 1.0 * srcp2p2)) / (16.0 * 16.0);
- }
-}
-
-
-typedef struct InterpParamsBilin {
- size_t dst_domain_size[2];
- ptrdiff_t src_stride;
- ptrdiff_t dst_stride;
- const double *src;
- double *dst;
- double dx_dst;
- double dy_dst;
- double dx_src;
- double dy_src;
- double scalex;
- double scaley;
-} InterpParamsBilin;
-
-static int mg_interp_bilinear_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx)
-{
- InterpParamsBilin *ip = arg;
- unsigned int idx1_dst = job_idx;
-
- double *dst = ip->dst;
- const double *src = ip->src;
- const double dx_dst = ip->dx_dst;
- const double dy_dst = ip->dy_dst;
- const double dx_src = ip->dx_src;
- const double dy_src = ip->dy_src;
- const double scalex = ip->scalex;
- const double scaley = ip->scaley;
- const ptrdiff_t src_stride = ip->src_stride;
- const ptrdiff_t dst_stride = ip->dst_stride;
-
- const double y = idx1_dst * dy_dst;
- const size_t idx1_src = idx1_dst * scaley;
- const double y0 = idx1_src * dy_src;
- const double y1 = y0 + dy_src;
- const double fact_y0 = (y1 - y) / dy_src;
- const double fact_y1 = (y - y0) / dy_src;
-
- for (size_t idx0_dst = 0; idx0_dst < ip->dst_domain_size[0]; idx0_dst++) {
- const double x = idx0_dst * dx_dst;
- const size_t idx0_src = idx0_dst * scalex;
- const size_t idx_src = idx1_src * src_stride + idx0_src;
- const double x0 = idx0_src * dx_src;
- const double x1 = x0 + dx_src;
- const double fact_x0 = (x1 - x) / dx_src;
- const double fact_x1 = (x - x0) / dx_src;
-
- const double val = fact_x0 * (fact_y0 * src[idx_src] + fact_y1 * src[idx_src + src_stride]) +
- fact_x1 * (fact_y0 * src[idx_src + 1] + fact_y1 * src[idx_src + 1 + src_stride]);
-
- dst[idx1_dst * dst_stride + idx0_dst] = val;
- }
-
- return 0;
-}
-
-static void mg_interp_bilinear(TPContext *tp,
- EGSContext *ctx_dst, double *dst, ptrdiff_t dst_stride,
- EGSContext *ctx_src, const double *src, ptrdiff_t src_stride)
-{
- InterpParamsBilin ip;
- ip.dst_domain_size[0] = ctx_dst->domain_size[0];
- ip.dst_domain_size[1] = ctx_dst->domain_size[1];
- ip.src_stride = src_stride;
- ip.dst_stride = dst_stride;
- ip.dx_dst = ctx_dst->step[0];
- ip.dy_dst = ctx_dst->step[1];
- ip.dx_src = ctx_src->step[0];
- ip.dy_src = ctx_src->step[1];
- ip.scalex = ip.dx_dst / ip.dx_src;
- ip.scaley = ip.dy_dst / ip.dy_src;
- ip.src = src;
- ip.dst = dst;
- tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bilinear_kernel, &ip);
-}
-
-static int mg_interp_bicubic_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx)
-{
- InterpParamsBilin *ip = arg;
- unsigned int idx1_dst = job_idx;
-
- double *dst = ip->dst;
- const double *src = ip->src;
- const double dx_dst = ip->dx_dst;
- const double dy_dst = ip->dy_dst;
- const double dx_src = ip->dx_src;
- const double dy_src = ip->dy_src;
- const double scalex = ip->scalex;
- const double scaley = ip->scaley;
- const ptrdiff_t src_stride = ip->src_stride;
- const ptrdiff_t dst_stride = ip->dst_stride;
-
- const double y = idx1_dst * dy_dst;
- const size_t idx1_src = idx1_dst * scaley;
-
- const double ym1 = idx1_src * dy_src;
- const double ym2 = ym1 - dy_src;
- const double yp1 = ym1 + dy_src;
- const double yp2 = yp1 + dy_src;
-
- const double fact_ym2 = (y - ym1) * (y - yp1) * (y - yp2) / ( (ym2 - ym1) * (ym2 - yp1) * (ym2 - yp2));
- const double fact_ym1 = (y - ym2) * (y - yp1) * (y - yp2) / ((ym1 - ym2) * (ym1 - yp1) * (ym1 - yp2));
- const double fact_yp1 = (y - ym2) * (y - ym1) * (y - yp2) / ((yp1 - ym2) * (yp1 - ym1) * (yp1 - yp2));
- const double fact_yp2 = (y - ym2) * (y - ym1) * (y - yp1) / ((yp2 - ym2) * (yp2 - ym1) * (yp2 - yp1) );
- const double fact_y[4] = { fact_ym2, fact_ym1, fact_yp1, fact_yp2 };
-
- for (size_t idx0_dst = 0; idx0_dst < ip->dst_domain_size[0]; idx0_dst++) {
- const double x = idx0_dst * dx_dst;
- const size_t idx0_src = idx0_dst * scalex;
- const size_t idx_src = (idx1_src - 1) * src_stride + (idx0_src - 1);
-
- const double xm1 = idx0_src * dx_src;
- const double xm2 = xm1 - dx_src;
- const double xp1 = xm1 + dx_src;
- const double xp2 = xp1 + dx_src;
-
- const double fact_xm2 = (x - xm1) * (x - xp1) * (x - xp2) / ( (xm2 - xm1) * (xm2 - xp1) * (xm2 - xp2));
- const double fact_xm1 = (x - xm2) * (x - xp1) * (x - xp2) / ((xm1 - xm2) * (xm1 - xp1) * (xm1 - xp2));
- const double fact_xp1 = (x - xm2) * (x - xm1) * (x - xp2) / ((xp1 - xm2) * (xp1 - xm1) * (xp1 - xp2));
- const double fact_xp2 = (x - xm2) * (x - xm1) * (x - xp1) / ((xp2 - xm2) * (xp2 - xm1) * (xp2 - xp1) );
-
- const double fact_x[4] = { fact_xm2, fact_xm1, fact_xp1, fact_xp2 };
-
- double val = 0.0;
-
- for (int idx_y = 0; idx_y < 4; idx_y++) {
- double tmp = 0.0;
- for (int idx_x = 0; idx_x < 4; idx_x++)
- tmp += fact_x[idx_x] * src[idx_src + idx_y * src_stride + idx_x];
- val += fact_y[idx_y] * tmp;
- }
-
- dst[idx1_dst * dst_stride + idx0_dst] = val;
- }
-
- return 0;
-}
-
-static void mg_interp_bicubic(TPContext *tp,
- EGSContext *ctx_dst, double *dst, ptrdiff_t dst_stride,
- EGSContext *ctx_src, const double *src, ptrdiff_t src_stride)
-{
- InterpParamsBilin ip;
- ip.dst_domain_size[0] = ctx_dst->domain_size[0];
- ip.dst_domain_size[1] = ctx_dst->domain_size[1];
- ip.src_stride = src_stride;
- ip.dst_stride = dst_stride;
- ip.dx_dst = ctx_dst->step[0];
- ip.dy_dst = ctx_dst->step[1];
- ip.dx_src = ctx_src->step[0];
- ip.dy_src = ctx_src->step[1];
- ip.scalex = ip.dx_dst / ip.dx_src;
- ip.scaley = ip.dy_dst / ip.dy_src;
- ip.src = src;
- ip.dst = dst;
- tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bicubic_kernel, &ip);
-}
-
static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
{
MG2DLevel *level = arg;
for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) {
const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0;
- const ptrdiff_t idx_src = job_idx * level->prolong_tmp_stride + idx0;
- level->solver->u->data[idx_dst] -= level->prolong_tmp[idx_src];
+ const ptrdiff_t idx_src = job_idx * level->prolong_tmp->stride[0] + idx0;
+ level->solver->u->data[idx_dst] -= level->prolong_tmp->data[idx_src];
}
return 0;
}
-static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
-{
- EGSContext *s_src = src->solver;
- EGSContext *s_dst = dst->solver;
- if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
- mg_restrict_fw(s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
- s_src, s_src->residual->data, s_src->residual->stride[0]);
- } else {
- if (ctx->fd_stencil == 1)
- mg_interp_bilinear(ctx->priv->tp,
- s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
- s_src, s_src->residual->data, s_src->residual->stride[0]);
- else
- mg_interp_bicubic(ctx->priv->tp,
- s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
- s_src, s_src->residual->data, s_src->residual->stride[0]);
- }
-}
-
-static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
-{
- EGSContext *s_src = src->solver;
- EGSContext *s_dst = dst->solver;
- if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) {
- if (ctx->fd_stencil == 1)
- mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u->data, s_src->u->stride[0]);
- else
- mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u->data, s_src->u->stride[0]);
- } else {
- if (ctx->fd_stencil == 1)
- mg_interp_bilinear(ctx->priv->tp,
- s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u->data, s_src->u->stride[0]);
- else
- mg_interp_bicubic(ctx->priv->tp,
- s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u->data, s_src->u->stride[0]);
- }
-}
-
static int mg_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_desc)
{
double res_old;
@@ -460,7 +169,10 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
/* restrict the residual as to the coarser-level rhs */
start = gettime();
- restrict_residual(ctx, level->child, level);
+ ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs,
+ level->solver->residual);
+ if (ret < 0)
+ return ret;
level->time_restrict += gettime() - start;
/* solve on the coarser level */
@@ -470,7 +182,10 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
/* prolongate the coarser-level correction */
start = gettime();
- prolong_solution(ctx, level, level->child);
+ ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp,
+ level->child->solver->u);
+ if (ret < 0)
+ return ret;
level->time_prolong += gettime() - start;
/* apply the correction */
@@ -530,22 +245,41 @@ static void bnd_copy(MG2DBoundary *bdst, const double *src, ptrdiff_t src_stride
}
}
-static void restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
+static int restrict_diff_coeffs(MG2DContext *ctx, enum GridTransferOperator op,
+ EGSContext *dst, EGSContext *src)
{
- EGSContext *s_src = src->solver;
- EGSContext *s_dst = dst->solver;
- if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
- for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
- mg_restrict_inject(s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
- s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
- }
- } else {
- for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
- mg_interp_bilinear(ctx->priv->tp,
- s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
- s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
- }
+ MG2DInternal *priv = ctx->priv;
+ GridTransferContext *tc;
+ int ret;
+
+ tc = mg2di_gt_alloc(op);
+ if (!tc)
+ return -ENOMEM;
+
+ tc->tp = priv->tp;
+ tc->cpuflags = priv->cpuflags;
+
+ tc->src.size[0] = src->domain_size[0];
+ tc->src.size[1] = src->domain_size[1];
+ tc->src.step[0] = src->step[0];
+ tc->src.step[1] = src->step[1];
+
+ tc->dst.size[0] = dst->domain_size[0];
+ tc->dst.size[1] = dst->domain_size[1];
+ tc->dst.step[0] = dst->step[0];
+ tc->dst.step[1] = dst->step[1];
+
+ ret = mg2di_gt_init(tc);
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < MG2D_DIFF_COEFF_NB; i++) {
+ ret = mg2di_gt_transfer(tc, dst->diff_coeffs[i], src->diff_coeffs[i]);
+ if (ret < 0)
+ return ret;
}
+
+ return 0;
}
static double findmax(double *arr, size_t size[2], ptrdiff_t stride)
@@ -565,6 +299,7 @@ static int mg_levels_init(MG2DContext *ctx)
MG2DInternal *priv = ctx->priv;
MG2DLevel *cur, *prev;
double diff0_max, diff2_max, tmp;
+ int ret;
diff0_max = findmax(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], priv->root->solver->domain_size,
ctx->diff_coeffs_stride);
@@ -591,10 +326,6 @@ static int mg_levels_init(MG2DContext *ctx)
cur->solver->step[1] = prev->solver->step[1] * ((double)(prev->solver->domain_size[1] - 1) / (cur->solver->domain_size[1] - 1));
}
- /* Set the equation coefficients. */
- if (prev)
- restrict_diff_coeffs(ctx, cur, prev);
-
for (int i = 0; i < ARRAY_ELEMS(cur->solver->boundaries); i++) {
MG2DBoundary *bsrc = ctx->boundaries[i];
MG2DBoundary *bdst = cur->solver->boundaries[i];
@@ -632,6 +363,74 @@ static int mg_levels_init(MG2DContext *ctx)
cur = cur->child;
}
+ cur = priv->root;
+ while (cur) {
+ mg2di_gt_free(&cur->transfer_restrict);
+ mg2di_gt_free(&cur->transfer_prolong);
+ if (cur->child) {
+ enum GridTransferOperator op_interp;
+ enum GridTransferOperator op_restrict;
+
+ if (ctx->fd_stencil == 1)
+ op_interp = GRID_TRANSFER_LAGRANGE_1;
+ else
+ op_interp = GRID_TRANSFER_LAGRANGE_3;
+
+ if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1)
+ op_restrict = GRID_TRANSFER_FW_1;
+ else
+ op_restrict = op_interp;
+
+ cur->transfer_restrict = mg2di_gt_alloc(op_restrict);
+ if (!cur->transfer_restrict)
+ return -ENOMEM;
+
+ cur->transfer_restrict->tp = priv->tp;
+ cur->transfer_restrict->cpuflags = priv->cpuflags;
+
+ cur->transfer_restrict->src.size[0] = cur->solver->domain_size[0];
+ cur->transfer_restrict->src.size[1] = cur->solver->domain_size[1];
+ cur->transfer_restrict->src.step[0] = cur->solver->step[0];
+ cur->transfer_restrict->src.step[1] = cur->solver->step[1];
+
+ cur->transfer_restrict->dst.size[0] = cur->child->solver->domain_size[0];
+ cur->transfer_restrict->dst.size[1] = cur->child->solver->domain_size[1];
+ cur->transfer_restrict->dst.step[0] = cur->child->solver->step[0];
+ cur->transfer_restrict->dst.step[1] = cur->child->solver->step[1];
+
+ ret = mg2di_gt_init(cur->transfer_restrict);
+ if (ret < 0)
+ return ret;
+
+ cur->transfer_prolong = mg2di_gt_alloc(op_interp);
+ if (!cur->transfer_prolong)
+ return -ENOMEM;
+
+ cur->transfer_prolong->tp = priv->tp;
+ cur->transfer_prolong->cpuflags = priv->cpuflags;
+
+ cur->transfer_prolong->dst.size[0] = cur->solver->domain_size[0];
+ cur->transfer_prolong->dst.size[1] = cur->solver->domain_size[1];
+ cur->transfer_prolong->dst.step[0] = cur->solver->step[0];
+ cur->transfer_prolong->dst.step[1] = cur->solver->step[1];
+
+ cur->transfer_prolong->src.size[0] = cur->child->solver->domain_size[0];
+ cur->transfer_prolong->src.size[1] = cur->child->solver->domain_size[1];
+ cur->transfer_prolong->src.step[0] = cur->child->solver->step[0];
+ cur->transfer_prolong->src.step[1] = cur->child->solver->step[1];
+
+ ret = mg2di_gt_init(cur->transfer_prolong);
+ if (ret < 0)
+ return ret;
+
+ ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver);
+ if (ret < 0)
+ return ret;
+ }
+
+ cur = cur->child;
+ }
+
return 0;
}
@@ -737,9 +536,13 @@ static void mg_level_free(MG2DLevel **plevel)
if (!level)
return;
- free(level->prolong_tmp);
+ mg2di_ndarray_free(&level->prolong_tmp);
+ mg2di_ndarray_free(&level->prolong_tmp_base);
mg2di_egs_free(&level->solver);
+ mg2di_gt_free(&level->transfer_restrict);
+ mg2di_gt_free(&level->transfer_prolong);
+
free(level);
*plevel = NULL;
}
@@ -753,10 +556,13 @@ static MG2DLevel *mg_level_alloc(enum EGSType type, const size_t domain_size)
if (!level)
return NULL;
- ret = posix_memalign((void**)&level->prolong_tmp, 32, sizeof(*level->prolong_tmp) * SQR(domain_size + 1));
- if (ret != 0)
+ ret = mg2di_ndarray_alloc(&level->prolong_tmp_base, 2, (size_t [2]){domain_size + 1, domain_size + 1}, 0);
+ if (ret < 0)
+ goto fail;
+
+ ret = mg2di_ndarray_slice(&level->prolong_tmp, level->prolong_tmp_base, (Slice [2]){ SLICE(0, -1, 1), SLICE(0, -1, 1) });
+ if (ret < 0)
goto fail;
- level->prolong_tmp_stride = domain_size + 1;
level->solver = mg2di_egs_alloc(type,
(size_t [2]){domain_size, domain_size});
diff --git a/transfer.c b/transfer.c
index 6c8f614..046d96d 100644
--- a/transfer.c
+++ b/transfer.c
@@ -98,7 +98,7 @@ static int transfer_factors_lagrange(GridTransferContext *ctx, unsigned int dim)
return -ENOMEM;
for (ptrdiff_t idx_dst = 0; idx_dst < ctx->dst.size[dim]; idx_dst++) {
- const ptrdiff_t idx_src = scale * idx_dst - (priv->stencil / 2 - 1);
+ const ptrdiff_t idx_src = (ptrdiff_t)(scale * idx_dst) - (priv->stencil / 2 - 1);
const double coord_dst = idx_dst * step_dst;
double *fact = priv->fact[dim] + priv->stencil * idx_dst;