aboutsummaryrefslogtreecommitdiff
path: root/ell_grid_solve.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2024-04-15 21:44:14 +0200
committerAnton Khirnov <anton@khirnov.net>2024-04-15 21:44:14 +0200
commite30cfde7614be7062249954eab6c3f56eeabbb51 (patch)
tree1a27f188ed94b9ae4d566150ca951a8ac7f0fad1 /ell_grid_solve.c
parent982d71cb08f6ccf564c0558c659ae2756bb39ba1 (diff)
residual_calc: accept all diff coefficients in a single array
Plus an offset parameter that signals the distance between different coefficients. This allows to avoid passing so many pointers around, which reduces register pressure and simplifies writing SIMD. Seems also to be a little faster.
Diffstat (limited to 'ell_grid_solve.c')
-rw-r--r--ell_grid_solve.c8
1 files changed, 3 insertions, 5 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index ecd9d8f..1cf2798 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -160,17 +160,15 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst)
static void residual_calc(EGSContext *ctx, int export_res)
{
EGSInternal *priv = ctx->priv;
- const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync];
size_t *size = priv->residual_calc_size[priv->steps_since_sync];
+ const double *diff_coeffs0 = NDA_PTR2D(priv->diff_coeffs[0], offset[0], offset[1]);
+ const double *diff_coeffs1 = NDA_PTR2D(priv->diff_coeffs[1], offset[0], offset[1]);
NDArray *dst = export_res ? ctx->residual : priv->u_next;
int reflect_flags = 0;
mg2di_timer_start(&ctx->timer_res_calc);
- for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = NDA_PTR2D(priv->diff_coeffs[i], offset[0], offset[1]);
-
for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++)
if (ctx->boundaries[bnd_idx]->type == MG2D_BC_TYPE_REFLECT &&
priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx])
@@ -180,7 +178,7 @@ static void residual_calc(EGSContext *ctx, int export_res)
NDA_PTR2D(dst, offset[0], offset[1]), dst->stride[0],
NDA_PTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0],
NDA_PTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0],
- diff_coeffs, priv->diff_coeffs[0]->stride[0],
+ diff_coeffs0, priv->diff_coeffs[0]->stride[0], diff_coeffs1 - diff_coeffs0,
export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor,
reflect_flags, FD_STENCIL_MAX);