summaryrefslogtreecommitdiff
path: root/ell_relax.c
diff options
context:
space:
mode:
Diffstat (limited to 'ell_relax.c')
-rw-r--r--ell_relax.c133
1 files changed, 67 insertions, 66 deletions
diff --git a/ell_relax.c b/ell_relax.c
index 891fc27..982a95a 100644
--- a/ell_relax.c
+++ b/ell_relax.c
@@ -28,6 +28,7 @@
#include "cpu.h"
#include "ell_relax.h"
#include "log.h"
+#include "mg2d_constants.h"
static const double relax_factors[FD_STENCIL_MAX] = {
[0] = 1.0 / 5,
@@ -39,7 +40,7 @@ struct EllRelaxInternal {
double *u_base;
double *rhs_base;
double *residual_base;
- double *diff_coeffs_base[ELL_RELAX_DIFF_COEFF_NB];
+ double *diff_coeffs_base[MG2D_DIFF_COEFF_NB];
double *boundary_val_base[4];
@@ -48,12 +49,12 @@ struct EllRelaxInternal {
void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB],
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
const double *fd_factors);
size_t calc_blocksize;
ptrdiff_t residual_calc_offset;
size_t residual_calc_size[2];
- double fd_factors[ELL_RELAX_DIFF_COEFF_NB];
+ double fd_factors[MG2D_DIFF_COEFF_NB];
double relax_factor;
TPContext *tp_internal;
@@ -63,65 +64,65 @@ static const struct {
unsigned int stride_idx;
unsigned int is_upper;
} boundary_def[] = {
- [ELL_RELAX_BOUNDARY_0L] = {
+ [MG2D_BOUNDARY_0L] = {
.stride_idx = 0,
.is_upper = 0
},
- [ELL_RELAX_BOUNDARY_0U] = {
+ [MG2D_BOUNDARY_0U] = {
.stride_idx = 0,
.is_upper = 1,
},
- [ELL_RELAX_BOUNDARY_1L] = {
+ [MG2D_BOUNDARY_1L] = {
.stride_idx = 1,
.is_upper = 0,
},
- [ELL_RELAX_BOUNDARY_1U] = {
+ [MG2D_BOUNDARY_1U] = {
.stride_idx = 1,
.is_upper = 1,
},
};
-static const double fd_denoms[][ELL_RELAX_DIFF_COEFF_NB] = {
+static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = {
{
- [ELL_RELAX_DIFF_COEFF_00] = 1.0,
- [ELL_RELAX_DIFF_COEFF_10] = 2.0,
- [ELL_RELAX_DIFF_COEFF_01] = 2.0,
- [ELL_RELAX_DIFF_COEFF_20] = 1.0,
- [ELL_RELAX_DIFF_COEFF_02] = 1.0,
- [ELL_RELAX_DIFF_COEFF_11] = 4.0,
+ [MG2D_DIFF_COEFF_00] = 1.0,
+ [MG2D_DIFF_COEFF_10] = 2.0,
+ [MG2D_DIFF_COEFF_01] = 2.0,
+ [MG2D_DIFF_COEFF_20] = 1.0,
+ [MG2D_DIFF_COEFF_02] = 1.0,
+ [MG2D_DIFF_COEFF_11] = 4.0,
},
{
- [ELL_RELAX_DIFF_COEFF_00] = 1.0,
- [ELL_RELAX_DIFF_COEFF_10] = 12.0,
- [ELL_RELAX_DIFF_COEFF_01] = 12.0,
- [ELL_RELAX_DIFF_COEFF_20] = 12.0,
- [ELL_RELAX_DIFF_COEFF_02] = 12.0,
- [ELL_RELAX_DIFF_COEFF_11] = 144.0,
+ [MG2D_DIFF_COEFF_00] = 1.0,
+ [MG2D_DIFF_COEFF_10] = 12.0,
+ [MG2D_DIFF_COEFF_01] = 12.0,
+ [MG2D_DIFF_COEFF_20] = 12.0,
+ [MG2D_DIFF_COEFF_02] = 12.0,
+ [MG2D_DIFF_COEFF_11] = 144.0,
},
};
#if HAVE_EXTERNAL_ASM
void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB],
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
const double *fd_factors);
void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB],
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
const double *fd_factors);
#endif
static void
derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride)
{
- dst[ELL_RELAX_DIFF_COEFF_00] = u[0];
- dst[ELL_RELAX_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[ELL_RELAX_DIFF_COEFF_10];
- dst[ELL_RELAX_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[ELL_RELAX_DIFF_COEFF_01];
+ dst[MG2D_DIFF_COEFF_00] = u[0];
+ dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10];
+ dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01];
- dst[ELL_RELAX_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[ELL_RELAX_DIFF_COEFF_20];
- dst[ELL_RELAX_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[ELL_RELAX_DIFF_COEFF_02];
+ dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20];
+ dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02];
- dst[ELL_RELAX_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[ELL_RELAX_DIFF_COEFF_11];
+ dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11];
}
static void
@@ -158,27 +159,27 @@ derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrd
const double valxm2ym1 = u[-2 - 1 * stride];
const double valxm2ym2 = u[-2 - 2 * stride];
- dst[ELL_RELAX_DIFF_COEFF_00] = val;
- dst[ELL_RELAX_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[ELL_RELAX_DIFF_COEFF_10];
- dst[ELL_RELAX_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[ELL_RELAX_DIFF_COEFF_01];
+ dst[MG2D_DIFF_COEFF_00] = val;
+ dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10];
+ dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01];
- dst[ELL_RELAX_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[ELL_RELAX_DIFF_COEFF_20];
- dst[ELL_RELAX_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[ELL_RELAX_DIFF_COEFF_02];
+ dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20];
+ dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02];
- dst[ELL_RELAX_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2
- -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2
- +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2
- -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[ELL_RELAX_DIFF_COEFF_11];
+ dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2
+ -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2
+ +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2
+ -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11];
}
static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB],
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
const double *fd_factors)
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
- double u_vals[ELL_RELAX_DIFF_COEFF_NB];
+ double u_vals[MG2D_DIFF_COEFF_NB];
double res;
derivatives_calc_s1(u_vals, u + i, fd_factors, stride);
@@ -197,12 +198,12 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[ELL_RELAX_DIFF_COEFF_NB],
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
const double *fd_factors)
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
- double u_vals[ELL_RELAX_DIFF_COEFF_NB];
+ double u_vals[MG2D_DIFF_COEFF_NB];
double res;
derivatives_calc_s2(u_vals, u + i, fd_factors, stride);
@@ -224,7 +225,7 @@ static void residual_calc_task(void *arg, unsigned int job_idx, unsigned int thr
EllRelaxContext *ctx = arg;
EllRelaxInternal *priv = ctx->priv;
const ptrdiff_t offset = priv->residual_calc_offset + job_idx * priv->stride;
- const double *diff_coeffs[ELL_RELAX_DIFF_COEFF_NB];
+ const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
diff_coeffs[i] = ctx->diff_coeffs[i] + offset;
@@ -298,11 +299,11 @@ static void boundaries_apply(EllRelaxContext *ctx)
(boundary_def[i].is_upper ? 1 : -1) * stride_offset };
switch (ctx->boundaries[i].type) {
- case ELL_RELAX_BC_TYPE_FIXVAL:
+ case MG2D_BC_TYPE_FIXVAL:
boundaries_apply_fixval(dst, dst_strides, ctx->boundaries[i].val,
ctx->boundaries[i].val_stride, size_boundary);
break;
- case ELL_RELAX_BC_TYPE_FIXDIFF:
+ case MG2D_BC_TYPE_FIXDIFF:
boundaries_apply_fixdiff(dst, dst_strides, ctx->boundaries[i].val,
ctx->boundaries[i].val_stride, size_boundary);
break;
@@ -310,12 +311,12 @@ static void boundaries_apply(EllRelaxContext *ctx)
}
/* fill in the corner ghosts */
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF ||
- ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXDIFF) {
+ if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF ||
+ ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXDIFF) {
double *dst = ctx->u;
int fact_x = -1, fact_z = -1;
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF)
+ if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF)
fact_z *= -1;
else
fact_x *= -1;
@@ -327,12 +328,12 @@ static void boundaries_apply(EllRelaxContext *ctx)
dst[idx_dst] = dst[idx_src];
}
}
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF ||
- ctx->boundaries[ELL_RELAX_BOUNDARY_1U].type == ELL_RELAX_BC_TYPE_FIXDIFF) {
+ if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF ||
+ ctx->boundaries[MG2D_BOUNDARY_1U].type == MG2D_BC_TYPE_FIXDIFF) {
double *dst = ctx->u + ctx->domain_size[0] - 1;
int fact_x = 1, fact_z = -1;
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXDIFF)
+ if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXDIFF)
fact_z *= -1;
else
fact_x *= -1;
@@ -344,12 +345,12 @@ static void boundaries_apply(EllRelaxContext *ctx)
dst[idx_dst] = dst[idx_src];
}
}
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXDIFF ||
- ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF) {
+ if (ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXDIFF ||
+ ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF) {
double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1);
int fact_x = -1, fact_z = 1;
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF)
+ if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF)
fact_z *= -1;
else
fact_x *= -1;
@@ -361,12 +362,12 @@ static void boundaries_apply(EllRelaxContext *ctx)
dst[idx_dst] = dst[idx_src];
}
}
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF ||
- ctx->boundaries[ELL_RELAX_BOUNDARY_1U].type == ELL_RELAX_BC_TYPE_FIXDIFF) {
+ if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF ||
+ ctx->boundaries[MG2D_BOUNDARY_1U].type == MG2D_BC_TYPE_FIXDIFF) {
double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1) + ctx->domain_size[0] - 1;
int fact_x = 1, fact_z = 1;
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0U].type == ELL_RELAX_BC_TYPE_FIXDIFF)
+ if (ctx->boundaries[MG2D_BOUNDARY_0U].type == MG2D_BC_TYPE_FIXDIFF)
fact_z *= -1;
else
fact_x *= -1;
@@ -457,12 +458,12 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx)
priv->relax_factor = ctx->relax_factor;
priv->relax_factor *= ctx->step[0] * ctx->step[0];
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_00];
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_10] * ctx->step[0]);
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_01] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_01] * ctx->step[1]);
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_20] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_20] * SQR(ctx->step[0]));
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_02] * SQR(ctx->step[1]));
- priv->fd_factors[ELL_RELAX_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][ELL_RELAX_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
+ priv->fd_factors[MG2D_DIFF_COEFF_00] = 1.0 / fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_00];
+ priv->fd_factors[MG2D_DIFF_COEFF_10] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_10] * ctx->step[0]);
+ priv->fd_factors[MG2D_DIFF_COEFF_01] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_01] * ctx->step[1]);
+ priv->fd_factors[MG2D_DIFF_COEFF_20] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_20] * SQR(ctx->step[0]));
+ priv->fd_factors[MG2D_DIFF_COEFF_02] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_02] * SQR(ctx->step[1]));
+ priv->fd_factors[MG2D_DIFF_COEFF_11] = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
if (!ctx->tp) {
ret = tp_init(&priv->tp_internal, 1);
@@ -474,20 +475,20 @@ int mg2di_ell_relax_init(EllRelaxContext *ctx)
priv->residual_calc_offset = 0;
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_0L].type == ELL_RELAX_BC_TYPE_FIXVAL)
+ if (ctx->boundaries[MG2D_BOUNDARY_0L].type == MG2D_BC_TYPE_FIXVAL)
priv->residual_calc_offset += ctx->residual_stride;
- if (ctx->boundaries[ELL_RELAX_BOUNDARY_1L].type == ELL_RELAX_BC_TYPE_FIXVAL)
+ if (ctx->boundaries[MG2D_BOUNDARY_1L].type == MG2D_BC_TYPE_FIXVAL)
priv->residual_calc_offset++;
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) {
EllRelaxBoundary *bnd = &ctx->boundaries[i];
- if (bnd->type == ELL_RELAX_BC_TYPE_FIXDIFF) {
+ if (bnd->type == MG2D_BC_TYPE_FIXDIFF) {
for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++)
if (bnd->val[k] != 0.0) {
mg2di_log(&ctx->logger, 0, "Only zero boundary derivative supported\n");
return -ENOSYS;
}
- } else if (bnd->type == ELL_RELAX_BC_TYPE_FIXVAL) {
+ } else if (bnd->type == MG2D_BC_TYPE_FIXVAL) {
priv->residual_calc_size[!boundary_def[i].stride_idx]--;
}
}