aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-18 09:59:25 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-18 09:59:25 +0100
commit53e7613a1111702bb62708d1e5aff8b18fa9c9cb (patch)
treee16be777cd32554fbb08172086f6cfa6222de1b3
parent880dc969a222b7f99005a3cbb84a2aab4a8f76a7 (diff)
mg2d: add bicubic prolongation and generic interpolation
-rw-r--r--mg2d.c154
1 files changed, 146 insertions, 8 deletions
diff --git a/mg2d.c b/mg2d.c
index a6c6f5d..1370aea 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -138,6 +138,48 @@ static void mg_prolongate(EGSContext *fine, double *dst, ptrdiff_t dst_s
}
}
+static void mg_prolongate_bicubic(EGSContext *fine, double *dst, ptrdiff_t dst_stride,
+ EGSContext *coarse, const double *src, ptrdiff_t src_stride)
+{
+ // for simplicity, this code writes one point over the domain size;
+ // this allows us to avoid treating the boundary layers specially
+ // dst is allocated with extra ghost zones for this purpose
+ for (size_t j = 0; j < coarse->domain_size[1]; j++)
+ for (size_t i = 0; i < coarse->domain_size[0]; i++) {
+ const ptrdiff_t idx_src = j * src_stride + i;
+ const ptrdiff_t idx_dst = 2 * (j * dst_stride + i);
+
+ const double src00 = src[idx_src];
+ const double srcm10 = src[idx_src - 1];
+ const double srcp10 = src[idx_src + 1];
+ const double srcp20 = src[idx_src + 2];
+
+ const double srcm1m1 = src[idx_src - src_stride - 1];
+ const double src0m1 = src[idx_src - src_stride];
+ const double srcp1m1 = src[idx_src - src_stride + 1];
+ const double srcp2m1 = src[idx_src - src_stride + 2];
+
+ const double srcm1p1 = src[idx_src + src_stride - 1];
+ const double src0p1 = src[idx_src + src_stride];
+ const double srcp1p1 = src[idx_src + src_stride + 1];
+ const double srcp2p1 = src[idx_src + src_stride + 2];
+
+ const double srcm1p2 = src[idx_src + 2 * src_stride - 1];
+ const double src0p2 = src[idx_src + 2 * src_stride];
+ const double srcp1p2 = src[idx_src + 2 * src_stride + 1];
+ const double srcp2p2 = src[idx_src + 2 * src_stride + 2];
+
+ dst[idx_dst] = src00;
+ dst[idx_dst + 1] = (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20) / 16.0;
+ dst[idx_dst + dst_stride] = (-1.0 * src0m1 + 9.0 * src00 + 9.0 * src0p1 - 1.0 * src0p2) / 16.0;
+ dst[idx_dst + dst_stride + 1] = (- 1.0 * (-1.0 * srcm1m1 + 9.0 * src0m1 + 9.0 * srcp1m1 - 1.0 * srcp2m1)
+ + 9.0 * (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20)
+ + 9.0 * (-1.0 * srcm1p1 + 9.0 * src0p1 + 9.0 * srcp1p1 - 1.0 * srcp2p1)
+ - 1.0 * (-1.0 * srcm1p2 + 9.0 * src0p2 + 9.0 * srcp1p2 - 1.0 * srcp2p2)) / (16.0 * 16.0);
+ }
+}
+
+
typedef struct InterpParamsBilin {
size_t dst_domain_size[2];
ptrdiff_t src_stride;
@@ -213,6 +255,88 @@ static void mg_interp_bilinear(TPContext *tp,
tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bilinear_kernel, &ip);
}
+static int mg_interp_bicubic_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ InterpParamsBilin *ip = arg;
+ unsigned int idx1_dst = job_idx;
+
+ double *dst = ip->dst;
+ const double *src = ip->src;
+ const double dx_dst = ip->dx_dst;
+ const double dy_dst = ip->dy_dst;
+ const double dx_src = ip->dx_src;
+ const double dy_src = ip->dy_src;
+ const double scalex = ip->scalex;
+ const double scaley = ip->scaley;
+ const ptrdiff_t src_stride = ip->src_stride;
+ const ptrdiff_t dst_stride = ip->dst_stride;
+
+ const double y = idx1_dst * dy_dst;
+ const size_t idx1_src = idx1_dst * scaley;
+
+ const double ym1 = idx1_src * dy_src;
+ const double ym2 = ym1 - dy_src;
+ const double yp1 = ym1 + dy_src;
+ const double yp2 = yp1 + dy_src;
+
+ const double fact_ym2 = (y - ym1) * (y - yp1) * (y - yp2) / ( (ym2 - ym1) * (ym2 - yp1) * (ym2 - yp2));
+ const double fact_ym1 = (y - ym2) * (y - yp1) * (y - yp2) / ((ym1 - ym2) * (ym1 - yp1) * (ym1 - yp2));
+ const double fact_yp1 = (y - ym2) * (y - ym1) * (y - yp2) / ((yp1 - ym2) * (yp1 - ym1) * (yp1 - yp2));
+ const double fact_yp2 = (y - ym2) * (y - ym1) * (y - yp1) / ((yp2 - ym2) * (yp2 - ym1) * (yp2 - yp1) );
+ const double fact_y[4] = { fact_ym2, fact_ym1, fact_yp1, fact_yp2 };
+
+ for (size_t idx0_dst = 0; idx0_dst < ip->dst_domain_size[0]; idx0_dst++) {
+ const double x = idx0_dst * dx_dst;
+ const size_t idx0_src = idx0_dst * scalex;
+ const size_t idx_src = (idx1_src - 1) * src_stride + (idx0_src - 1);
+
+ const double xm1 = idx0_src * dx_src;
+ const double xm2 = xm1 - dx_src;
+ const double xp1 = xm1 + dx_src;
+ const double xp2 = xp1 + dx_src;
+
+ const double fact_xm2 = (x - xm1) * (x - xp1) * (x - xp2) / ( (xm2 - xm1) * (xm2 - xp1) * (xm2 - xp2));
+ const double fact_xm1 = (x - xm2) * (x - xp1) * (x - xp2) / ((xm1 - xm2) * (xm1 - xp1) * (xm1 - xp2));
+ const double fact_xp1 = (x - xm2) * (x - xm1) * (x - xp2) / ((xp1 - xm2) * (xp1 - xm1) * (xp1 - xp2));
+ const double fact_xp2 = (x - xm2) * (x - xm1) * (x - xp1) / ((xp2 - xm2) * (xp2 - xm1) * (xp2 - xp1) );
+
+ const double fact_x[4] = { fact_xm2, fact_xm1, fact_xp1, fact_xp2 };
+
+ double val = 0.0;
+
+ for (int idx_y = 0; idx_y < 4; idx_y++) {
+ double tmp = 0.0;
+ for (int idx_x = 0; idx_x < 4; idx_x++)
+ tmp += fact_x[idx_x] * src[idx_src + idx_y * src_stride + idx_x];
+ val += fact_y[idx_y] * tmp;
+ }
+
+ dst[idx1_dst * dst_stride + idx0_dst] = val;
+ }
+
+ return 0;
+}
+
+static void mg_interp_bicubic(TPContext *tp,
+ EGSContext *ctx_dst, double *dst, ptrdiff_t dst_stride,
+ EGSContext *ctx_src, const double *src, ptrdiff_t src_stride)
+{
+ InterpParamsBilin ip;
+ ip.dst_domain_size[0] = ctx_dst->domain_size[0];
+ ip.dst_domain_size[1] = ctx_dst->domain_size[1];
+ ip.src_stride = src_stride;
+ ip.dst_stride = dst_stride;
+ ip.dx_dst = ctx_dst->step[0];
+ ip.dy_dst = ctx_dst->step[1];
+ ip.dx_src = ctx_src->step[0];
+ ip.dy_src = ctx_src->step[1];
+ ip.scalex = ip.dx_dst / ip.dx_src;
+ ip.scaley = ip.dy_dst / ip.dy_src;
+ ip.src = src;
+ ip.dst = dst;
+ tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bicubic_kernel, &ip);
+}
+
static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
{
MG2DLevel *level = arg;
@@ -233,9 +357,14 @@ static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride,
s_src, s_src->residual, s_src->residual_stride);
} else {
- mg_interp_bilinear(ctx->priv->tp,
- s_dst, s_dst->rhs, s_dst->rhs_stride,
- s_src, s_src->residual, s_src->residual_stride);
+ if (ctx->fd_stencil == 1)
+ mg_interp_bilinear(ctx->priv->tp,
+ s_dst, s_dst->rhs, s_dst->rhs_stride,
+ s_src, s_src->residual, s_src->residual_stride);
+ else
+ mg_interp_bicubic(ctx->priv->tp,
+ s_dst, s_dst->rhs, s_dst->rhs_stride,
+ s_src, s_src->residual, s_src->residual_stride);
}
}
@@ -244,12 +373,21 @@ static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
EGSContext *s_src = src->solver;
EGSContext *s_dst = dst->solver;
if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) {
- mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ if (ctx->fd_stencil == 1)
+ mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
+ s_src, s_src->u, s_src->u_stride);
+ else
+ mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
+ s_src, s_src->u, s_src->u_stride);
} else {
- mg_interp_bilinear(ctx->priv->tp,
- s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ if (ctx->fd_stencil == 1)
+ mg_interp_bilinear(ctx->priv->tp,
+ s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
+ s_src, s_src->u, s_src->u_stride);
+ else
+ mg_interp_bicubic(ctx->priv->tp,
+ s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
+ s_src, s_src->u, s_src->u_stride);
}
}