diff options
author | Anton Khirnov <anton@khirnov.net> | 2024-04-15 21:44:14 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2024-04-15 21:44:14 +0200 |
commit | e30cfde7614be7062249954eab6c3f56eeabbb51 (patch) | |
tree | 1a27f188ed94b9ae4d566150ca951a8ac7f0fad1 /ell_grid_solve.c | |
parent | 982d71cb08f6ccf564c0558c659ae2756bb39ba1 (diff) |
residual_calc: accept all diff coefficients in a single array
Plus an offset parameter that signals the distance between different
coefficients. This allows to avoid passing so many pointers around,
which reduces register pressure and simplifies writing SIMD. Seems also
to be a little faster.
Diffstat (limited to 'ell_grid_solve.c')
-rw-r--r-- | ell_grid_solve.c | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c index ecd9d8f..1cf2798 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -160,17 +160,15 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst) static void residual_calc(EGSContext *ctx, int export_res) { EGSInternal *priv = ctx->priv; - const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync]; size_t *size = priv->residual_calc_size[priv->steps_since_sync]; + const double *diff_coeffs0 = NDA_PTR2D(priv->diff_coeffs[0], offset[0], offset[1]); + const double *diff_coeffs1 = NDA_PTR2D(priv->diff_coeffs[1], offset[0], offset[1]); NDArray *dst = export_res ? ctx->residual : priv->u_next; int reflect_flags = 0; mg2di_timer_start(&ctx->timer_res_calc); - for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = NDA_PTR2D(priv->diff_coeffs[i], offset[0], offset[1]); - for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++) if (ctx->boundaries[bnd_idx]->type == MG2D_BC_TYPE_REFLECT && priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx]) @@ -180,7 +178,7 @@ static void residual_calc(EGSContext *ctx, int export_res) NDA_PTR2D(dst, offset[0], offset[1]), dst->stride[0], NDA_PTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0], NDA_PTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0], - diff_coeffs, priv->diff_coeffs[0]->stride[0], + diff_coeffs0, priv->diff_coeffs[0]->stride[0], diff_coeffs1 - diff_coeffs0, export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor, reflect_flags, FD_STENCIL_MAX); |