diff options
Diffstat (limited to 'ell_relax.c')
-rw-r--r-- | ell_relax.c | 133 |
1 files changed, 67 insertions, 66 deletions
diff --git a/ell_relax.c b/ell_relax.c index 891fc27..982a95a 100644 --- a/ell_relax.c +++ b/ell_relax.c @@ -28,6 +28,7 @@ #include "cpu.h" #include "ell_relax.h" #include "log.h" +#include "mg2d_constants.h" static const double relax_factors[FD_STENCIL_MAX] = { [0] = 1.0 / 5, @@ -39,7 +40,7 @@ struct EllRelaxInternal { double *u_base; double *rhs_base; double *residual_base; - double *diff_coeffs_base[ELL_RELAX_DIFF_COEFF_NB]; + double *diff_coeffs_base[MG2D_DIFF_COEFF_NB]; double *boundary_val_base[4]; @@ -48,12 +49,12 @@ struct EllRelaxInternal { void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB], + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], const double *fd_factors); size_t calc_blocksize; ptrdiff_t residual_calc_offset; size_t residual_calc_size[2]; - double fd_factors[ELL_RELAX_DIFF_COEFF_NB]; + double fd_factors[MG2D_DIFF_COEFF_NB]; double relax_factor; TPContext *tp_internal; @@ -63,65 +64,65 @@ static const struct { unsigned int stride_idx; unsigned int is_upper; } boundary_def[] = { - [ELL_RELAX_BOUNDARY_0L] = { + [MG2D_BOUNDARY_0L] = { .stride_idx = 0, .is_upper = 0 }, - [ELL_RELAX_BOUNDARY_0U] = { + [MG2D_BOUNDARY_0U] = { .stride_idx = 0, .is_upper = 1, }, - [ELL_RELAX_BOUNDARY_1L] = { + [MG2D_BOUNDARY_1L] = { .stride_idx = 1, .is_upper = 0, }, - [ELL_RELAX_BOUNDARY_1U] = { + [MG2D_BOUNDARY_1U] = { .stride_idx = 1, .is_upper = 1, }, }; -static const double fd_denoms[][ELL_RELAX_DIFF_COEFF_NB] = { +static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = { { - [ELL_RELAX_DIFF_COEFF_00] = 1.0, - [ELL_RELAX_DIFF_COEFF_10] = 2.0, - [ELL_RELAX_DIFF_COEFF_01] = 2.0, - [ELL_RELAX_DIFF_COEFF_20] = 1.0, - [ELL_RELAX_DIFF_COEFF_02] = 1.0, - [ELL_RELAX_DIFF_COEFF_11] = 4.0, + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 2.0, + [MG2D_DIFF_COEFF_01] = 2.0, + [MG2D_DIFF_COEFF_20] = 1.0, + [MG2D_DIFF_COEFF_02] = 1.0, + [MG2D_DIFF_COEFF_11] = 4.0, }, { - [ELL_RELAX_DIFF_COEFF_00] = 1.0, - [ELL_RELAX_DIFF_COEFF_10] = 12.0, - [ELL_RELAX_DIFF_COEFF_01] = 12.0, - [ELL_RELAX_DIFF_COEFF_20] = 12.0, - [ELL_RELAX_DIFF_COEFF_02] = 12.0, - [ELL_RELAX_DIFF_COEFF_11] = 144.0, + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 12.0, + [MG2D_DIFF_COEFF_01] = 12.0, + [MG2D_DIFF_COEFF_20] = 12.0, + [MG2D_DIFF_COEFF_02] = 12.0, + [MG2D_DIFF_COEFF_11] = 144.0, }, }; #if HAVE_EXTERNAL_ASM void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB], + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], const double *fd_factors); void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB], + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], const double *fd_factors); #endif static void derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride) { - dst[ELL_RELAX_DIFF_COEFF_00] = u[0]; - dst[ELL_RELAX_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[ELL_RELAX_DIFF_COEFF_10]; - dst[ELL_RELAX_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[ELL_RELAX_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_00] = u[0]; + dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10]; + dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01]; - dst[ELL_RELAX_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[ELL_RELAX_DIFF_COEFF_20]; - dst[ELL_RELAX_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[ELL_RELAX_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20]; + dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02]; - dst[ELL_RELAX_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[ELL_RELAX_DIFF_COEFF_11]; + dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11]; } static void @@ -158,27 +159,27 @@ derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrd const double valxm2ym1 = u[-2 - 1 * stride]; const double valxm2ym2 = u[-2 - 2 * stride]; - dst[ELL_RELAX_DIFF_COEFF_00] = val; - dst[ELL_RELAX_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[ELL_RELAX_DIFF_COEFF_10]; - dst[ELL_RELAX_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[ELL_RELAX_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_00] = val; + dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10]; + dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01]; - dst[ELL_RELAX_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[ELL_RELAX_DIFF_COEFF_20]; - dst[ELL_RELAX_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[ELL_RELAX_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20]; + dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02]; - dst[ELL_RELAX_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2 - -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2 - +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2 - -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[ELL_RELAX_DIFF_COEFF_11]; + dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2 + -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2 + +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2 + -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11]; } static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB], + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], const double *fd_factors) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { - double u_vals[ELL_RELAX_DIFF_COEFF_NB]; + double u_vals[MG2D_DIFF_COEFF_NB]; double res; derivatives_calc_s1(u_vals, u + i, fd_factors, stride); @@ -197,12 +198,12 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB], + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], const double *fd_factors) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { - double u_vals[ELL_RELAX_DIFF_COEFF_NB]; + double u_vals[MG2D_DIFF_COEFF_NB]; double res; derivatives_calc_s2(u_vals, u + i, fd_factors, stride); @@ -224,7 +225,7 @@ static void residual_calc_task(void *arg, unsigned int job_idx, unsigned int thr EllRelaxContext *ctx = arg; EllRelaxInternal *priv = ctx->priv; const ptrdiff_t offset = priv->residual_calc_offset + job_idx * priv->stride; - const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB]; + const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) diff_coeffs[i] = ctx->diff_coeffs[i] + offset; @@ -298,11 +299,11 @@ static void boundaries_apply(EllRelaxContext *ctx) (boundary_def[i].is_upper ? 1 : -1) * stride_offset }; switch (ctx->boundaries[i].type) { - case ELL_RELAX_BC_TYPE_FIXVAL: + case MG2D_BC_TYPE_FIXVAL: boundaries_apply_fixval(dst, dst_strides, ctx->boundaries[i].val, ctx->boundaries[i].val_stride, size_boundary); break; - case ELL_RELAX_BC_TYPE_FIXDIFF: + case MG2D_BC_TYPE_FIXDIFF: boundaries_apply_fixdiff(dst, dst_strides, ctx->boundaries[i].val, ctx->boundaries[i].val_stride, size_boundary); break; @@ -310,12 +311,12 @@ static void boundaries_apply(EllRelaxContext *ctx) } /* fill in the corner ghosts */ - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF || - ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXDIFF) { + if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF || + ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXDIFF) { double *dst = ctx->u; int fact_x = -1, fact_z = -1; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF) + if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF) fact_z *= -1; else fact_x *= -1; @@ -327,12 +328,12 @@ static void boundaries_apply(EllRelaxContext *ctx) dst[idx_dst] = dst[idx_src]; } } - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF || - ctx->boundaries[ELL_RELAX_BOUNDARY_1U].type == ELL_RELAX_BC_TYPE_FIXDIFF) { + if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF || + ctx->boundaries[MG2D_BOUNDARY_1U].type == MG2D_BC_TYPE_FIXDIFF) { double *dst = ctx->u + ctx->domain_size[0] - 1; int fact_x = 1, fact_z = -1; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF) + if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF) fact_z *= -1; else fact_x *= -1; @@ -344,12 +345,12 @@ static void boundaries_apply(EllRelaxContext *ctx) dst[idx_dst] = dst[idx_src]; } } - if (ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXDIFF || - ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF) { + if (ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXDIFF || + ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF) { double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1); int fact_x = -1, fact_z = 1; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF) + if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF) fact_z *= -1; else fact_x *= -1; @@ -361,12 +362,12 @@ static void boundaries_apply(EllRelaxContext *ctx) dst[idx_dst] = dst[idx_src]; } } - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF || - ctx->boundaries[ELL_RELAX_BOUNDARY_1U].type == ELL_RELAX_BC_TYPE_FIXDIFF) { + if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF || + ctx->boundaries[MG2D_BOUNDARY_1U].type == MG2D_BC_TYPE_FIXDIFF) { double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1) + ctx->domain_size[0] - 1; int fact_x = 1, fact_z = 1; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF) + if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF) fact_z *= -1; else fact_x *= -1; @@ -457,12 +458,12 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx) priv->relax_factor = ctx->relax_factor; priv->relax_factor *= ctx->step[0] * ctx->step[0]; - priv->fd_factors[ELL_RELAX_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_00]; - priv->fd_factors[ELL_RELAX_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_10] * ctx->step[0]); - priv->fd_factors[ELL_RELAX_DIFF_COEFF_01] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_01] * ctx->step[1]); - priv->fd_factors[ELL_RELAX_DIFF_COEFF_20] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_20] * SQR(ctx->step[0])); - priv->fd_factors[ELL_RELAX_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_02] * SQR(ctx->step[1])); - priv->fd_factors[ELL_RELAX_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); + priv->fd_factors[MG2D_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00]; + priv->fd_factors[MG2D_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]); + priv->fd_factors[MG2D_DIFF_COEFF_01] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]); + priv->fd_factors[MG2D_DIFF_COEFF_20] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0])); + priv->fd_factors[MG2D_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1])); + priv->fd_factors[MG2D_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); if (!ctx->tp) { ret = tp_init(&priv->tp_internal, 1); @@ -474,20 +475,20 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx) priv->residual_calc_offset = 0; priv->residual_calc_size[0] = ctx->domain_size[0]; priv->residual_calc_size[1] = ctx->domain_size[1]; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXVAL) + if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXVAL) priv->residual_calc_offset += ctx->residual_stride; - if (ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXVAL) + if (ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXVAL) priv->residual_calc_offset++; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { EllRelaxBoundary *bnd = &ctx->boundaries[i]; - if (bnd->type == ELL_RELAX_BC_TYPE_FIXDIFF) { + if (bnd->type == MG2D_BC_TYPE_FIXDIFF) { for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++) if (bnd->val[k] != 0.0) { mg2di_log(&ctx->logger, 0, "Only zero boundary derivative supported\n"); return -ENOSYS; } - } else if (bnd->type == ELL_RELAX_BC_TYPE_FIXVAL) { + } else if (bnd->type == MG2D_BC_TYPE_FIXVAL) { priv->residual_calc_size[!boundary_def[i].stride_idx]--; } } |