From c2ead14543d11a532a8ac564119a7a10160fdc41 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 22 Mar 2019 18:50:55 +0100 Subject: ell_grid_solve: switch to ndarray in its external API --- ell_grid_solve.c | 178 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 82 deletions(-) (limited to 'ell_grid_solve.c') diff --git a/ell_grid_solve.c b/ell_grid_solve.c index 117cbd1..49b2321 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -47,7 +47,7 @@ typedef struct EGSRelaxInternal { } EGSRelaxInternal; typedef struct EGSExactInternal { - void (*fill_mat)(double *mat_row, double **diff_coeffs, double *fd_factors, + void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset); size_t N; size_t N_ghosts; @@ -111,11 +111,11 @@ static void residual_calc(EGSContext *ctx) start = gettime(); for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset; + diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset; mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride, - &ctx->residual_max, ctx->residual + priv->residual_calc_offset, - ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset, + &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset, + ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset, diff_coeffs, priv->fd_factors); ctx->time_res_calc += gettime() - start; @@ -191,7 +191,7 @@ static void boundaries_apply(EGSContext *ctx) const size_t size_boundary = ctx->domain_size[!ci]; const size_t size_offset = ctx->domain_size[ci]; - double *dst = ctx->u + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset); + double *dst = ctx->u->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset); const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset }; switch (ctx->boundaries[i]->type) { @@ -222,7 +222,7 @@ static void boundaries_apply(EGSContext *ctx) const int dir_x = mg2d_bnd_out_dir(loc_x); int fact_x = dir_x, fact_y = dir_y; - double *dst = ctx->u + double *dst = ctx->u->data + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride) + mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1); @@ -251,9 +251,9 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa EGSInternal *priv = ctx->priv; for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { - ptrdiff_t idx = job_idx * ctx->u_stride + idx0; + ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0; - ctx->u[idx] += priv->r.relax_factor * ctx->residual[idx]; + ctx->u->data[idx] += priv->r.relax_factor * ctx->residual->data[idx]; } return 0; @@ -277,78 +277,78 @@ static int solve_relax_step(EGSContext *ctx) return 0; } -static void fill_mat_s1(double *mat_row, double **diff_coeffs, double *fd_factors, +static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset) { - mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src]; + mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; - mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; } -static void fill_mat_s2(double *mat_row, double **diff_coeffs, double *fd_factors, +static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset) { - mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src]; - - mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - - mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - - mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - - mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - - mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + + mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + + mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + + mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + + mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; } static int solve_exact(EGSContext *ctx) @@ -396,7 +396,7 @@ static int solve_exact(EGSContext *ctx) e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset); - e->rhs[mat_row_idx] = ctx->rhs[idx_src]; + e->rhs[mat_row_idx] = ctx->rhs->data[idx_src]; if (boundary) { double *mat_dst = e->mat + e->N * mat_row_idx; @@ -554,7 +554,7 @@ static int solve_exact(EGSContext *ctx) } for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) - memcpy(ctx->u + idx1 * ctx->u_stride, e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x)); + memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x)); ec->time_lin_solve += gettime() - start; ec->count_lin_solve++; @@ -637,7 +637,7 @@ int mg2di_egs_init(EGSContext *ctx) priv->residual_calc_size[0] = ctx->domain_size[0]; priv->residual_calc_size[1] = ctx->domain_size[1]; if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL) - priv->residual_calc_offset += ctx->residual_stride; + priv->residual_calc_offset += ctx->residual->stride[0]; if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL) priv->residual_calc_offset++; @@ -675,7 +675,6 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) domain_size[0] + 2 * ghosts, }; const size_t stride = size_padded[0]; - const size_t start_offset = ghosts * stride + ghosts; const Slice slice[2] = { SLICE(ghosts, -ghosts, 1), SLICE(ghosts, -ghosts, 1) }; int ret; @@ -683,30 +682,38 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, 0); if (ret < 0) return ret; - ctx->u = priv->u_base->data + start_offset; - ctx->u_stride = priv->u_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice); + if (ret < 0) + return ret; ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_padded, 0); if (ret < 0) return ret; - ctx->rhs = priv->rhs_base->data + start_offset; - ctx->rhs_stride = priv->rhs_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->rhs, priv->rhs_base, slice); + if (ret < 0) + return ret; ret = mg2di_ndarray_alloc(&priv->residual_base, 2, size_padded, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ctx->residual = priv->residual_base->data + start_offset; - ctx->residual_stride = priv->residual_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->residual, priv->residual_base, slice); + if (ret < 0) + return ret; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2, size_padded, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ctx->diff_coeffs[i] = priv->diff_coeffs_base[i]->data + start_offset; + + ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice); + if (ret < 0) + return ret; } - ctx->diff_coeffs_stride = stride; priv->stride = stride; @@ -820,13 +827,20 @@ void mg2di_egs_free(EGSContext **pctx) mg2di_bicgstab_context_free(&e->bicgstab); } - mg2di_ndarray_free(&ctx->a_u); + mg2di_ndarray_free(&ctx->u); mg2di_ndarray_free(&ctx->priv->u_base); + mg2di_ndarray_free(&ctx->rhs); mg2di_ndarray_free(&ctx->priv->rhs_base); + + mg2di_ndarray_free(&ctx->residual); mg2di_ndarray_free(&ctx->priv->residual_base); - for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) + + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) { + mg2di_ndarray_free(&ctx->diff_coeffs[i]); mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]); + } + for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); -- cgit v1.2.3