From e30cfde7614be7062249954eab6c3f56eeabbb51 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 15 Apr 2024 21:44:14 +0200 Subject: residual_calc: accept all diff coefficients in a single array Plus an offset parameter that signals the distance between different coefficients. This allows to avoid passing so many pointers around, which reduces register pressure and simplifies writing SIMD. Seems also to be a little faster. --- ell_grid_solve.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'ell_grid_solve.c') diff --git a/ell_grid_solve.c b/ell_grid_solve.c index ecd9d8f..1cf2798 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -160,17 +160,15 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst) static void residual_calc(EGSContext *ctx, int export_res) { EGSInternal *priv = ctx->priv; - const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync]; size_t *size = priv->residual_calc_size[priv->steps_since_sync]; + const double *diff_coeffs0 = NDA_PTR2D(priv->diff_coeffs[0], offset[0], offset[1]); + const double *diff_coeffs1 = NDA_PTR2D(priv->diff_coeffs[1], offset[0], offset[1]); NDArray *dst = export_res ? ctx->residual : priv->u_next; int reflect_flags = 0; mg2di_timer_start(&ctx->timer_res_calc); - for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = NDA_PTR2D(priv->diff_coeffs[i], offset[0], offset[1]); - for (int bnd_idx = 0; bnd_idx < 4; bnd_idx++) if (ctx->boundaries[bnd_idx]->type == MG2D_BC_TYPE_REFLECT && priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx]) @@ -180,7 +178,7 @@ static void residual_calc(EGSContext *ctx, int export_res) NDA_PTR2D(dst, offset[0], offset[1]), dst->stride[0], NDA_PTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0], NDA_PTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0], - diff_coeffs, priv->diff_coeffs[0]->stride[0], + diff_coeffs0, priv->diff_coeffs[0]->stride[0], diff_coeffs1 - diff_coeffs0, export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor, reflect_flags, FD_STENCIL_MAX); -- cgit v1.2.3