From c2ead14543d11a532a8ac564119a7a10160fdc41 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 22 Mar 2019 18:50:55 +0100 Subject: ell_grid_solve: switch to ndarray in its external API --- ell_grid_solve.c | 178 ++++++++++++++++++++++++++++++------------------------- ell_grid_solve.h | 27 ++------- mg2d.c | 43 +++++++------- relax_test.c | 29 ++++----- 4 files changed, 139 insertions(+), 138 deletions(-) diff --git a/ell_grid_solve.c b/ell_grid_solve.c index 117cbd1..49b2321 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -47,7 +47,7 @@ typedef struct EGSRelaxInternal { } EGSRelaxInternal; typedef struct EGSExactInternal { - void (*fill_mat)(double *mat_row, double **diff_coeffs, double *fd_factors, + void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset); size_t N; size_t N_ghosts; @@ -111,11 +111,11 @@ static void residual_calc(EGSContext *ctx) start = gettime(); for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset; + diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset; mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride, - &ctx->residual_max, ctx->residual + priv->residual_calc_offset, - ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset, + &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset, + ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset, diff_coeffs, priv->fd_factors); ctx->time_res_calc += gettime() - start; @@ -191,7 +191,7 @@ static void boundaries_apply(EGSContext *ctx) const size_t size_boundary = ctx->domain_size[!ci]; const size_t size_offset = ctx->domain_size[ci]; - double *dst = ctx->u + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset); + double *dst = ctx->u->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset); const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset }; switch (ctx->boundaries[i]->type) { @@ -222,7 +222,7 @@ static void boundaries_apply(EGSContext *ctx) const int dir_x = mg2d_bnd_out_dir(loc_x); int fact_x = dir_x, fact_y = dir_y; - double *dst = ctx->u + double *dst = ctx->u->data + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride) + mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1); @@ -251,9 +251,9 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa EGSInternal *priv = ctx->priv; for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { - ptrdiff_t idx = job_idx * ctx->u_stride + idx0; + ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0; - ctx->u[idx] += priv->r.relax_factor * ctx->residual[idx]; + ctx->u->data[idx] += priv->r.relax_factor * ctx->residual->data[idx]; } return 0; @@ -277,78 +277,78 @@ static int solve_relax_step(EGSContext *ctx) return 0; } -static void fill_mat_s1(double *mat_row, double **diff_coeffs, double *fd_factors, +static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset) { - mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src]; + mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; - mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; } -static void fill_mat_s2(double *mat_row, double **diff_coeffs, double *fd_factors, +static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset) { - mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src]; - - mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; - - mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; - - mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; - - mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; - - mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - - mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; - mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + + mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + + mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + + mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + + mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; } static int solve_exact(EGSContext *ctx) @@ -396,7 +396,7 @@ static int solve_exact(EGSContext *ctx) e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset); - e->rhs[mat_row_idx] = ctx->rhs[idx_src]; + e->rhs[mat_row_idx] = ctx->rhs->data[idx_src]; if (boundary) { double *mat_dst = e->mat + e->N * mat_row_idx; @@ -554,7 +554,7 @@ static int solve_exact(EGSContext *ctx) } for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) - memcpy(ctx->u + idx1 * ctx->u_stride, e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x)); + memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x)); ec->time_lin_solve += gettime() - start; ec->count_lin_solve++; @@ -637,7 +637,7 @@ int mg2di_egs_init(EGSContext *ctx) priv->residual_calc_size[0] = ctx->domain_size[0]; priv->residual_calc_size[1] = ctx->domain_size[1]; if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL) - priv->residual_calc_offset += ctx->residual_stride; + priv->residual_calc_offset += ctx->residual->stride[0]; if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL) priv->residual_calc_offset++; @@ -675,7 +675,6 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) domain_size[0] + 2 * ghosts, }; const size_t stride = size_padded[0]; - const size_t start_offset = ghosts * stride + ghosts; const Slice slice[2] = { SLICE(ghosts, -ghosts, 1), SLICE(ghosts, -ghosts, 1) }; int ret; @@ -683,30 +682,38 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, 0); if (ret < 0) return ret; - ctx->u = priv->u_base->data + start_offset; - ctx->u_stride = priv->u_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice); + if (ret < 0) + return ret; ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_padded, 0); if (ret < 0) return ret; - ctx->rhs = priv->rhs_base->data + start_offset; - ctx->rhs_stride = priv->rhs_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->rhs, priv->rhs_base, slice); + if (ret < 0) + return ret; ret = mg2di_ndarray_alloc(&priv->residual_base, 2, size_padded, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ctx->residual = priv->residual_base->data + start_offset; - ctx->residual_stride = priv->residual_base->stride[0]; + + ret = mg2di_ndarray_slice(&ctx->residual, priv->residual_base, slice); + if (ret < 0) + return ret; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2, size_padded, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ctx->diff_coeffs[i] = priv->diff_coeffs_base[i]->data + start_offset; + + ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice); + if (ret < 0) + return ret; } - ctx->diff_coeffs_stride = stride; priv->stride = stride; @@ -820,13 +827,20 @@ void mg2di_egs_free(EGSContext **pctx) mg2di_bicgstab_context_free(&e->bicgstab); } - mg2di_ndarray_free(&ctx->a_u); + mg2di_ndarray_free(&ctx->u); mg2di_ndarray_free(&ctx->priv->u_base); + mg2di_ndarray_free(&ctx->rhs); mg2di_ndarray_free(&ctx->priv->rhs_base); + + mg2di_ndarray_free(&ctx->residual); mg2di_ndarray_free(&ctx->priv->residual_base); - for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) + + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) { + mg2di_ndarray_free(&ctx->diff_coeffs[i]); mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]); + } + for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); diff --git a/ell_grid_solve.h b/ell_grid_solve.h index 5455c13..d56616b 100644 --- a/ell_grid_solve.h +++ b/ell_grid_solve.h @@ -43,6 +43,7 @@ #include "log.h" #include "mg2d_boundary.h" #include "mg2d_constants.h" +#include "ndarray.h" enum EGSType { /** @@ -156,11 +157,7 @@ typedef struct EGSContext { * initial guess. * Afterwards updated in mg2di_egs_step(). */ - double *u; - /** - * Distance between neighbouring gridpoints along coord1. - */ - ptrdiff_t u_stride; + NDArray *u; /** * Values of the right-hand side. @@ -168,24 +165,16 @@ typedef struct EGSContext { * Allocated by the solver in mg2di_egs_alloc(), owned by the solver. * Must be filled by the caller before mg2di_egs_init(). */ - double *rhs; - /** - * Distance between neighbouring gridpoints along coord1. - */ - ptrdiff_t rhs_stride; + NDArray *rhs; /** - * Values of the right-hand side. + * Values of the residual. * * Allocated by the solver in mg2di_egs_alloc(), owned by the solver. * Read-only for the caller. Initialized after mg2di_egs_init(), * afterwards updated in mg2di_egs_step(). */ - double *residual; - /** - * Distance between neighbouring gridpoints along coord1. - */ - ptrdiff_t residual_stride; + NDArray *residual; /** * Maximum of the absolute value of residual. @@ -198,11 +187,7 @@ typedef struct EGSContext { * Allocated by the solver in mg2di_egs_alloc(), owned by the solver. * Must be filled by the caller before mg2di_egs_init(). */ - double *diff_coeffs[MG2D_DIFF_COEFF_NB]; - /** - * Distance between neighbouring gridpoints along coord1. - */ - ptrdiff_t diff_coeffs_stride; + NDArray *diff_coeffs[MG2D_DIFF_COEFF_NB]; /* timings */ int64_t time_boundaries; diff --git a/mg2d.c b/mg2d.c index 9949eee..a9b98a3 100644 --- a/mg2d.c +++ b/mg2d.c @@ -33,6 +33,7 @@ #include "mg2d_boundary.h" #include "mg2d_boundary_internal.h" #include "mg2d_constants.h" +#include "ndarray.h" typedef struct MG2DLevel { unsigned int depth; @@ -342,9 +343,9 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr MG2DLevel *level = arg; for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) { - const ptrdiff_t idx_dst = job_idx * level->solver->u_stride + idx0; + const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0; const ptrdiff_t idx_src = job_idx * level->prolong_tmp_stride + idx0; - level->solver->u[idx_dst] -= level->prolong_tmp[idx_src]; + level->solver->u->data[idx_dst] -= level->prolong_tmp[idx_src]; } return 0; } @@ -354,17 +355,17 @@ static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) EGSContext *s_src = src->solver; EGSContext *s_dst = dst->solver; if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) { - mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride, - s_src, s_src->residual, s_src->residual_stride); + mg_restrict_fw(s_dst, s_dst->rhs->data, s_dst->rhs->stride[0], + s_src, s_src->residual->data, s_src->residual->stride[0]); } else { if (ctx->fd_stencil == 1) mg_interp_bilinear(ctx->priv->tp, - s_dst, s_dst->rhs, s_dst->rhs_stride, - s_src, s_src->residual, s_src->residual_stride); + s_dst, s_dst->rhs->data, s_dst->rhs->stride[0], + s_src, s_src->residual->data, s_src->residual->stride[0]); else mg_interp_bicubic(ctx->priv->tp, - s_dst, s_dst->rhs, s_dst->rhs_stride, - s_src, s_src->residual, s_src->residual_stride); + s_dst, s_dst->rhs->data, s_dst->rhs->stride[0], + s_src, s_src->residual->data, s_src->residual->stride[0]); } } @@ -375,19 +376,19 @@ static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) { if (ctx->fd_stencil == 1) mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + s_src, s_src->u->data, s_src->u->stride[0]); else mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + s_src, s_src->u->data, s_src->u->stride[0]); } else { if (ctx->fd_stencil == 1) mg_interp_bilinear(ctx->priv->tp, s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + s_src, s_src->u->data, s_src->u->stride[0]); else mg_interp_bicubic(ctx->priv->tp, s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, - s_src, s_src->u, s_src->u_stride); + s_src, s_src->u->data, s_src->u->stride[0]); } } @@ -421,7 +422,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) /* on the refined levels, use zero as the initial guess for the * solution (correction for the upper level) */ if (level->depth > 0) { - memset(level->solver->u, 0, sizeof(*level->solver->u) * level->solver->u_stride * + memset(level->solver->u->data, 0, sizeof(*level->solver->u->data) * level->solver->u->stride[0] * level->solver->domain_size[1]); /* re-init the current-level solver (re-calc the residual) */ @@ -539,14 +540,14 @@ static void restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *sr EGSContext *s_dst = dst->solver; if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) { for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) { - mg_restrict_inject(s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride, - s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride); + mg_restrict_inject(s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0], + s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]); } } else { for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) { mg_interp_bilinear(ctx->priv->tp, - s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride, - s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride); + s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0], + s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]); } } } @@ -580,10 +581,10 @@ static int mg_levels_init(MG2DContext *ctx) cur = priv->root; prev = NULL; - array_copy(cur->solver->u, cur->solver->u_stride, ctx->u, ctx->u_stride, ctx->domain_size, ctx->domain_size); - array_copy(cur->solver->rhs, cur->solver->rhs_stride, ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size); + array_copy(cur->solver->u->data, cur->solver->u->stride[0], ctx->u, ctx->u_stride, ctx->domain_size, ctx->domain_size); + array_copy(cur->solver->rhs->data, cur->solver->rhs->stride[0], ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size); for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { - array_copy(cur->solver->diff_coeffs[i], cur->solver->diff_coeffs_stride, + array_copy(cur->solver->diff_coeffs[i]->data, cur->solver->diff_coeffs[i]->stride[0], ctx->diff_coeffs[i], ctx->diff_coeffs_stride, ctx->domain_size, ctx->domain_size); } @@ -709,7 +710,7 @@ int mg2d_solve(MG2DContext *ctx) mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n", i, res_cur); - array_copy(ctx->u, ctx->u_stride, s_root->u, s_root->u_stride, ctx->domain_size, ctx->domain_size); + array_copy(ctx->u, ctx->u_stride, s_root->u->data, s_root->u->stride[0], ctx->domain_size, ctx->domain_size); priv->time_solve += gettime() - time_start; priv->count_solve++; diff --git a/relax_test.c b/relax_test.c index 1e9b962..54155ca 100644 --- a/relax_test.c +++ b/relax_test.c @@ -8,6 +8,7 @@ #include "log.h" #include "mg2d_boundary.h" #include "mg2d_constants.h" +#include "ndarray.h" #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) #define DOMAIN_SIZE 1.0 @@ -113,24 +114,24 @@ int main(int argc, char **argv) for (size_t y = 0; y < ctx->domain_size[1]; y++) { const double y_coord = y * ctx->step[1]; - memset(ctx->u + y * ctx->u_stride, 0, sizeof(*ctx->u) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->u, 0, y), 0, sizeof(*ctx->u->data) * ctx->domain_size[0]); for (size_t x = 0; x < ctx->domain_size[0]; x++) { const double x_coord = x * ctx->step[0]; - ctx->diff_coeffs[MG2D_DIFF_COEFF_02][ctx->diff_coeffs_stride * y + x] = 1.0; - ctx->diff_coeffs[MG2D_DIFF_COEFF_20][ctx->diff_coeffs_stride * y + x] = 1.0; - ctx->diff_coeffs[MG2D_DIFF_COEFF_11][ctx->diff_coeffs_stride * y + x] = 1.0; + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], x, y) = 1.0; + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], x, y) = 1.0; + *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_11], x, y) = 1.0; - ctx->rhs[y * ctx->rhs_stride + x] = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord); + *NDPTR2D(ctx->rhs, x, y) = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord); } - memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_00] + y * ctx->diff_coeffs_stride, 0, - sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]); - memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_01] + y * ctx->diff_coeffs_stride, 0, - sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]); - memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_10] + y * ctx->diff_coeffs_stride, 0, - sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); + memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 0, y), 0, + sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]); } ret = mg2di_egs_init(ctx); @@ -140,7 +141,7 @@ int main(int argc, char **argv) goto fail; } - res_old = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]); + res_old = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]); for (int i = 0; i < maxiter; i++) { ret = mg2di_egs_solve(ctx); @@ -149,7 +150,7 @@ int main(int argc, char **argv) ret = 1; goto fail; } - res_new = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]); + res_new = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]); if (res_new > 1e3) { fprintf(stderr, "Diverged at step %d: %g -> %g\n", i, res_old, res_new); goto fail; @@ -166,7 +167,7 @@ int main(int argc, char **argv) for (size_t x = 0; x < ctx->domain_size[0]; x++) { const double x_coord = x * ctx->step[0]; - double err = fabs(ctx->u[y * ctx->u_stride + x] - sol(x_coord, y_coord)); + double err = fabs(ctx->u->data[y * ctx->u->stride[0] + x] - sol(x_coord, y_coord)); if (err > max_err) max_err = err; } -- cgit v1.2.3