aboutsummaryrefslogtreecommitdiff
path: root/ell_grid_solve.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-22 18:50:55 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-22 20:12:10 +0100
commitc2ead14543d11a532a8ac564119a7a10160fdc41 (patch)
treec6c6da6d323f47896b85acc4cb75e01dc516c9a1 /ell_grid_solve.c
parentaa0b903240d5c0ea5b0af8f5dc5aeb0845fd69b7 (diff)
ell_grid_solve: switch to ndarray in its external API
Diffstat (limited to 'ell_grid_solve.c')
-rw-r--r--ell_grid_solve.c178
1 files changed, 96 insertions, 82 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 117cbd1..49b2321 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -47,7 +47,7 @@ typedef struct EGSRelaxInternal {
} EGSRelaxInternal;
typedef struct EGSExactInternal {
- void (*fill_mat)(double *mat_row, double **diff_coeffs, double *fd_factors,
+ void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset);
size_t N;
size_t N_ghosts;
@@ -111,11 +111,11 @@ static void residual_calc(EGSContext *ctx)
start = gettime();
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset;
+ diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset;
mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride,
- &ctx->residual_max, ctx->residual + priv->residual_calc_offset,
- ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset,
+ &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset,
+ ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset,
diff_coeffs, priv->fd_factors);
ctx->time_res_calc += gettime() - start;
@@ -191,7 +191,7 @@ static void boundaries_apply(EGSContext *ctx)
const size_t size_boundary = ctx->domain_size[!ci];
const size_t size_offset = ctx->domain_size[ci];
- double *dst = ctx->u + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
+ double *dst = ctx->u->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset };
switch (ctx->boundaries[i]->type) {
@@ -222,7 +222,7 @@ static void boundaries_apply(EGSContext *ctx)
const int dir_x = mg2d_bnd_out_dir(loc_x);
int fact_x = dir_x, fact_y = dir_y;
- double *dst = ctx->u
+ double *dst = ctx->u->data
+ mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride)
+ mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1);
@@ -251,9 +251,9 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa
EGSInternal *priv = ctx->priv;
for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
- ptrdiff_t idx = job_idx * ctx->u_stride + idx0;
+ ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0;
- ctx->u[idx] += priv->r.relax_factor * ctx->residual[idx];
+ ctx->u->data[idx] += priv->r.relax_factor * ctx->residual->data[idx];
}
return 0;
@@ -277,78 +277,78 @@ static int solve_relax_step(EGSContext *ctx)
return 0;
}
-static void fill_mat_s1(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
{
- mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
+ mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
- mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
-static void fill_mat_s2(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
{
- mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
-
- mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-
- mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-
- mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-
- mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-
- mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
+
+ mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+
+ mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+
+ mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+
+ mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+
+ mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
static int solve_exact(EGSContext *ctx)
@@ -396,7 +396,7 @@ static int solve_exact(EGSContext *ctx)
e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset);
- e->rhs[mat_row_idx] = ctx->rhs[idx_src];
+ e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
if (boundary) {
double *mat_dst = e->mat + e->N * mat_row_idx;
@@ -554,7 +554,7 @@ static int solve_exact(EGSContext *ctx)
}
for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
- memcpy(ctx->u + idx1 * ctx->u_stride, e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
+ memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
ec->time_lin_solve += gettime() - start;
ec->count_lin_solve++;
@@ -637,7 +637,7 @@ int mg2di_egs_init(EGSContext *ctx)
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL)
- priv->residual_calc_offset += ctx->residual_stride;
+ priv->residual_calc_offset += ctx->residual->stride[0];
if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL)
priv->residual_calc_offset++;
@@ -675,7 +675,6 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
domain_size[0] + 2 * ghosts,
};
const size_t stride = size_padded[0];
- const size_t start_offset = ghosts * stride + ghosts;
const Slice slice[2] = { SLICE(ghosts, -ghosts, 1),
SLICE(ghosts, -ghosts, 1) };
int ret;
@@ -683,30 +682,38 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, 0);
if (ret < 0)
return ret;
- ctx->u = priv->u_base->data + start_offset;
- ctx->u_stride = priv->u_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice);
+ if (ret < 0)
+ return ret;
ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_padded, 0);
if (ret < 0)
return ret;
- ctx->rhs = priv->rhs_base->data + start_offset;
- ctx->rhs_stride = priv->rhs_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->rhs, priv->rhs_base, slice);
+ if (ret < 0)
+ return ret;
ret = mg2di_ndarray_alloc(&priv->residual_base, 2, size_padded,
NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ctx->residual = priv->residual_base->data + start_offset;
- ctx->residual_stride = priv->residual_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->residual, priv->residual_base, slice);
+ if (ret < 0)
+ return ret;
for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2,
size_padded, NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ctx->diff_coeffs[i] = priv->diff_coeffs_base[i]->data + start_offset;
+
+ ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice);
+ if (ret < 0)
+ return ret;
}
- ctx->diff_coeffs_stride = stride;
priv->stride = stride;
@@ -820,13 +827,20 @@ void mg2di_egs_free(EGSContext **pctx)
mg2di_bicgstab_context_free(&e->bicgstab);
}
- mg2di_ndarray_free(&ctx->a_u);
+ mg2di_ndarray_free(&ctx->u);
mg2di_ndarray_free(&ctx->priv->u_base);
+ mg2di_ndarray_free(&ctx->rhs);
mg2di_ndarray_free(&ctx->priv->rhs_base);
+
+ mg2di_ndarray_free(&ctx->residual);
mg2di_ndarray_free(&ctx->priv->residual_base);
- for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++)
+
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) {
+ mg2di_ndarray_free(&ctx->diff_coeffs[i]);
mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]);
+ }
+
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)
mg2di_bc_free(&ctx->boundaries[i]);