From 53e7613a1111702bb62708d1e5aff8b18fa9c9cb Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 18 Mar 2019 09:59:25 +0100 Subject: mg2d: add bicubic prolongation and generic interpolation --- mg2d.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 146 insertions(+), 8 deletions(-) diff --git a/mg2d.c b/mg2d.c index a6c6f5d..1370aea 100644 --- a/mg2d.c +++ b/mg2d.c @@ -138,6 +138,48 @@ static void mg_prolongate(EGSContext *fine, double *dst, ptrdiff_t dst_s } } +static void mg_prolongate_bicubic(EGSContext *fine, double *dst, ptrdiff_t dst_stride, + EGSContext *coarse, const double *src, ptrdiff_t src_stride) +{ + // for simplicity, this code writes one point over the domain size; + // this allows us to avoid treating the boundary layers specially + // dst is allocated with extra ghost zones for this purpose + for (size_t j = 0; j < coarse->domain_size[1]; j++) + for (size_t i = 0; i < coarse->domain_size[0]; i++) { + const ptrdiff_t idx_src = j * src_stride + i; + const ptrdiff_t idx_dst = 2 * (j * dst_stride + i); + + const double src00 = src[idx_src]; + const double srcm10 = src[idx_src - 1]; + const double srcp10 = src[idx_src + 1]; + const double srcp20 = src[idx_src + 2]; + + const double srcm1m1 = src[idx_src - src_stride - 1]; + const double src0m1 = src[idx_src - src_stride]; + const double srcp1m1 = src[idx_src - src_stride + 1]; + const double srcp2m1 = src[idx_src - src_stride + 2]; + + const double srcm1p1 = src[idx_src + src_stride - 1]; + const double src0p1 = src[idx_src + src_stride]; + const double srcp1p1 = src[idx_src + src_stride + 1]; + const double srcp2p1 = src[idx_src + src_stride + 2]; + + const double srcm1p2 = src[idx_src + 2 * src_stride - 1]; + const double src0p2 = src[idx_src + 2 * src_stride]; + const double srcp1p2 = src[idx_src + 2 * src_stride + 1]; + const double srcp2p2 = src[idx_src + 2 * src_stride + 2]; + + dst[idx_dst] = src00; + dst[idx_dst + 1] = (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20) / 16.0; + dst[idx_dst + dst_stride] = (-1.0 * src0m1 + 9.0 * src00 + 9.0 * src0p1 - 1.0 * src0p2) / 16.0; + dst[idx_dst + dst_stride + 1] = (- 1.0 * (-1.0 * srcm1m1 + 9.0 * src0m1 + 9.0 * srcp1m1 - 1.0 * srcp2m1) + + 9.0 * (-1.0 * srcm10 + 9.0 * src00 + 9.0 * srcp10 - 1.0 * srcp20) + + 9.0 * (-1.0 * srcm1p1 + 9.0 * src0p1 + 9.0 * srcp1p1 - 1.0 * srcp2p1) + - 1.0 * (-1.0 * srcm1p2 + 9.0 * src0p2 + 9.0 * srcp1p2 - 1.0 * srcp2p2)) / (16.0 * 16.0); + } +} + + typedef struct InterpParamsBilin { size_t dst_domain_size[2]; ptrdiff_t src_stride; @@ -213,6 +255,88 @@ static void mg_interp_bilinear(TPContext *tp, tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bilinear_kernel, &ip); } +static int mg_interp_bicubic_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx) +{ + InterpParamsBilin *ip = arg; + unsigned int idx1_dst = job_idx; + + double *dst = ip->dst; + const double *src = ip->src; + const double dx_dst = ip->dx_dst; + const double dy_dst = ip->dy_dst; + const double dx_src = ip->dx_src; + const double dy_src = ip->dy_src; + const double scalex = ip->scalex; + const double scaley = ip->scaley; + const ptrdiff_t src_stride = ip->src_stride; + const ptrdiff_t dst_stride = ip->dst_stride; + + const double y = idx1_dst * dy_dst; + const size_t idx1_src = idx1_dst * scaley; + + const double ym1 = idx1_src * dy_src; + const double ym2 = ym1 - dy_src; + const double yp1 = ym1 + dy_src; + const double yp2 = yp1 + dy_src; + + const double fact_ym2 = (y - ym1) * (y - yp1) * (y - yp2) / ( (ym2 - ym1) * (ym2 - yp1) * (ym2 - yp2)); + const double fact_ym1 = (y - ym2) * (y - yp1) * (y - yp2) / ((ym1 - ym2) * (ym1 - yp1) * (ym1 - yp2)); + const double fact_yp1 = (y - ym2) * (y - ym1) * (y - yp2) / ((yp1 - ym2) * (yp1 - ym1) * (yp1 - yp2)); + const double fact_yp2 = (y - ym2) * (y - ym1) * (y - yp1) / ((yp2 - ym2) * (yp2 - ym1) * (yp2 - yp1) ); + const double fact_y[4] = { fact_ym2, fact_ym1, fact_yp1, fact_yp2 }; + + for (size_t idx0_dst = 0; idx0_dst < ip->dst_domain_size[0]; idx0_dst++) { + const double x = idx0_dst * dx_dst; + const size_t idx0_src = idx0_dst * scalex; + const size_t idx_src = (idx1_src - 1) * src_stride + (idx0_src - 1); + + const double xm1 = idx0_src * dx_src; + const double xm2 = xm1 - dx_src; + const double xp1 = xm1 + dx_src; + const double xp2 = xp1 + dx_src; + + const double fact_xm2 = (x - xm1) * (x - xp1) * (x - xp2) / ( (xm2 - xm1) * (xm2 - xp1) * (xm2 - xp2)); + const double fact_xm1 = (x - xm2) * (x - xp1) * (x - xp2) / ((xm1 - xm2) * (xm1 - xp1) * (xm1 - xp2)); + const double fact_xp1 = (x - xm2) * (x - xm1) * (x - xp2) / ((xp1 - xm2) * (xp1 - xm1) * (xp1 - xp2)); + const double fact_xp2 = (x - xm2) * (x - xm1) * (x - xp1) / ((xp2 - xm2) * (xp2 - xm1) * (xp2 - xp1) ); + + const double fact_x[4] = { fact_xm2, fact_xm1, fact_xp1, fact_xp2 }; + + double val = 0.0; + + for (int idx_y = 0; idx_y < 4; idx_y++) { + double tmp = 0.0; + for (int idx_x = 0; idx_x < 4; idx_x++) + tmp += fact_x[idx_x] * src[idx_src + idx_y * src_stride + idx_x]; + val += fact_y[idx_y] * tmp; + } + + dst[idx1_dst * dst_stride + idx0_dst] = val; + } + + return 0; +} + +static void mg_interp_bicubic(TPContext *tp, + EGSContext *ctx_dst, double *dst, ptrdiff_t dst_stride, + EGSContext *ctx_src, const double *src, ptrdiff_t src_stride) +{ + InterpParamsBilin ip; + ip.dst_domain_size[0] = ctx_dst->domain_size[0]; + ip.dst_domain_size[1] = ctx_dst->domain_size[1]; + ip.src_stride = src_stride; + ip.dst_stride = dst_stride; + ip.dx_dst = ctx_dst->step[0]; + ip.dy_dst = ctx_dst->step[1]; + ip.dx_src = ctx_src->step[0]; + ip.dy_src = ctx_src->step[1]; + ip.scalex = ip.dx_dst / ip.dx_src; + ip.scaley = ip.dy_dst / ip.dy_src; + ip.src = src; + ip.dst = dst; + tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bicubic_kernel, &ip); +} + static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { MG2DLevel *level = arg; @@ -233,9 +357,14 @@ static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride, s_src, s_src->residual, s_src->residual_stride); } else { - mg_interp_bilinear(ctx->priv->tp, - s_dst, s_dst->rhs, s_dst->rhs_stride, - s_src, s_src->residual, s_src->residual_stride); + if (ctx->fd_stencil == 1) + mg_interp_bilinear(ctx->priv->tp, + s_dst, s_dst->rhs, s_dst->rhs_stride, + s_src, s_src->residual, s_src->residual_stride); + else + mg_interp_bicubic(ctx->priv->tp, + s_dst, s_dst->rhs, s_dst->rhs_stride, + s_src, s_src->residual, s_src->residual_stride); } } @@ -244,12 +373,21 @@ static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) EGSContext *s_src = src->solver; EGSContext *s_dst = dst->solver; if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) { - mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + if (ctx->fd_stencil == 1) + mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, + s_src, s_src->u, s_src->u_stride); + else + mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, + s_src, s_src->u, s_src->u_stride); } else { - mg_interp_bilinear(ctx->priv->tp, - s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + if (ctx->fd_stencil == 1) + mg_interp_bilinear(ctx->priv->tp, + s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, + s_src, s_src->u, s_src->u_stride); + else + mg_interp_bicubic(ctx->priv->tp, + s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, + s_src, s_src->u, s_src->u_stride); } } -- cgit v1.2.3