aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-22 18:50:55 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-22 20:12:10 +0100
commitc2ead14543d11a532a8ac564119a7a10160fdc41 (patch)
treec6c6da6d323f47896b85acc4cb75e01dc516c9a1
parentaa0b903240d5c0ea5b0af8f5dc5aeb0845fd69b7 (diff)
ell_grid_solve: switch to ndarray in its external API
-rw-r--r--ell_grid_solve.c178
-rw-r--r--ell_grid_solve.h27
-rw-r--r--mg2d.c43
-rw-r--r--relax_test.c29
4 files changed, 139 insertions, 138 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 117cbd1..49b2321 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -47,7 +47,7 @@ typedef struct EGSRelaxInternal {
} EGSRelaxInternal;
typedef struct EGSExactInternal {
- void (*fill_mat)(double *mat_row, double **diff_coeffs, double *fd_factors,
+ void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset);
size_t N;
size_t N_ghosts;
@@ -111,11 +111,11 @@ static void residual_calc(EGSContext *ctx)
start = gettime();
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset;
+ diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset;
mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride,
- &ctx->residual_max, ctx->residual + priv->residual_calc_offset,
- ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset,
+ &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset,
+ ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset,
diff_coeffs, priv->fd_factors);
ctx->time_res_calc += gettime() - start;
@@ -191,7 +191,7 @@ static void boundaries_apply(EGSContext *ctx)
const size_t size_boundary = ctx->domain_size[!ci];
const size_t size_offset = ctx->domain_size[ci];
- double *dst = ctx->u + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
+ double *dst = ctx->u->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset };
switch (ctx->boundaries[i]->type) {
@@ -222,7 +222,7 @@ static void boundaries_apply(EGSContext *ctx)
const int dir_x = mg2d_bnd_out_dir(loc_x);
int fact_x = dir_x, fact_y = dir_y;
- double *dst = ctx->u
+ double *dst = ctx->u->data
+ mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride)
+ mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1);
@@ -251,9 +251,9 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa
EGSInternal *priv = ctx->priv;
for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
- ptrdiff_t idx = job_idx * ctx->u_stride + idx0;
+ ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0;
- ctx->u[idx] += priv->r.relax_factor * ctx->residual[idx];
+ ctx->u->data[idx] += priv->r.relax_factor * ctx->residual->data[idx];
}
return 0;
@@ -277,78 +277,78 @@ static int solve_relax_step(EGSContext *ctx)
return 0;
}
-static void fill_mat_s1(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
{
- mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
+ mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
- mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
-static void fill_mat_s2(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
{
- mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
-
- mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-
- mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-
- mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-
- mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-
- mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
+
+ mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset + 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[row_offset - 2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+
+ mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[row_offset - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+
+ mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[row_offset - 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+
+ mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[row_offset - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+
+ mat_row[row_offset + 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset + 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset + 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset - 1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[row_offset - 2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
static int solve_exact(EGSContext *ctx)
@@ -396,7 +396,7 @@ static int solve_exact(EGSContext *ctx)
e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset);
- e->rhs[mat_row_idx] = ctx->rhs[idx_src];
+ e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
if (boundary) {
double *mat_dst = e->mat + e->N * mat_row_idx;
@@ -554,7 +554,7 @@ static int solve_exact(EGSContext *ctx)
}
for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
- memcpy(ctx->u + idx1 * ctx->u_stride, e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
+ memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
ec->time_lin_solve += gettime() - start;
ec->count_lin_solve++;
@@ -637,7 +637,7 @@ int mg2di_egs_init(EGSContext *ctx)
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL)
- priv->residual_calc_offset += ctx->residual_stride;
+ priv->residual_calc_offset += ctx->residual->stride[0];
if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL)
priv->residual_calc_offset++;
@@ -675,7 +675,6 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
domain_size[0] + 2 * ghosts,
};
const size_t stride = size_padded[0];
- const size_t start_offset = ghosts * stride + ghosts;
const Slice slice[2] = { SLICE(ghosts, -ghosts, 1),
SLICE(ghosts, -ghosts, 1) };
int ret;
@@ -683,30 +682,38 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, 0);
if (ret < 0)
return ret;
- ctx->u = priv->u_base->data + start_offset;
- ctx->u_stride = priv->u_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice);
+ if (ret < 0)
+ return ret;
ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_padded, 0);
if (ret < 0)
return ret;
- ctx->rhs = priv->rhs_base->data + start_offset;
- ctx->rhs_stride = priv->rhs_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->rhs, priv->rhs_base, slice);
+ if (ret < 0)
+ return ret;
ret = mg2di_ndarray_alloc(&priv->residual_base, 2, size_padded,
NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ctx->residual = priv->residual_base->data + start_offset;
- ctx->residual_stride = priv->residual_base->stride[0];
+
+ ret = mg2di_ndarray_slice(&ctx->residual, priv->residual_base, slice);
+ if (ret < 0)
+ return ret;
for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2,
size_padded, NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ctx->diff_coeffs[i] = priv->diff_coeffs_base[i]->data + start_offset;
+
+ ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice);
+ if (ret < 0)
+ return ret;
}
- ctx->diff_coeffs_stride = stride;
priv->stride = stride;
@@ -820,13 +827,20 @@ void mg2di_egs_free(EGSContext **pctx)
mg2di_bicgstab_context_free(&e->bicgstab);
}
- mg2di_ndarray_free(&ctx->a_u);
+ mg2di_ndarray_free(&ctx->u);
mg2di_ndarray_free(&ctx->priv->u_base);
+ mg2di_ndarray_free(&ctx->rhs);
mg2di_ndarray_free(&ctx->priv->rhs_base);
+
+ mg2di_ndarray_free(&ctx->residual);
mg2di_ndarray_free(&ctx->priv->residual_base);
- for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++)
+
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) {
+ mg2di_ndarray_free(&ctx->diff_coeffs[i]);
mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]);
+ }
+
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)
mg2di_bc_free(&ctx->boundaries[i]);
diff --git a/ell_grid_solve.h b/ell_grid_solve.h
index 5455c13..d56616b 100644
--- a/ell_grid_solve.h
+++ b/ell_grid_solve.h
@@ -43,6 +43,7 @@
#include "log.h"
#include "mg2d_boundary.h"
#include "mg2d_constants.h"
+#include "ndarray.h"
enum EGSType {
/**
@@ -156,11 +157,7 @@ typedef struct EGSContext {
* initial guess.
* Afterwards updated in mg2di_egs_step().
*/
- double *u;
- /**
- * Distance between neighbouring gridpoints along coord1.
- */
- ptrdiff_t u_stride;
+ NDArray *u;
/**
* Values of the right-hand side.
@@ -168,24 +165,16 @@ typedef struct EGSContext {
* Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
* Must be filled by the caller before mg2di_egs_init().
*/
- double *rhs;
- /**
- * Distance between neighbouring gridpoints along coord1.
- */
- ptrdiff_t rhs_stride;
+ NDArray *rhs;
/**
- * Values of the right-hand side.
+ * Values of the residual.
*
* Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
* Read-only for the caller. Initialized after mg2di_egs_init(),
* afterwards updated in mg2di_egs_step().
*/
- double *residual;
- /**
- * Distance between neighbouring gridpoints along coord1.
- */
- ptrdiff_t residual_stride;
+ NDArray *residual;
/**
* Maximum of the absolute value of residual.
@@ -198,11 +187,7 @@ typedef struct EGSContext {
* Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
* Must be filled by the caller before mg2di_egs_init().
*/
- double *diff_coeffs[MG2D_DIFF_COEFF_NB];
- /**
- * Distance between neighbouring gridpoints along coord1.
- */
- ptrdiff_t diff_coeffs_stride;
+ NDArray *diff_coeffs[MG2D_DIFF_COEFF_NB];
/* timings */
int64_t time_boundaries;
diff --git a/mg2d.c b/mg2d.c
index 9949eee..a9b98a3 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -33,6 +33,7 @@
#include "mg2d_boundary.h"
#include "mg2d_boundary_internal.h"
#include "mg2d_constants.h"
+#include "ndarray.h"
typedef struct MG2DLevel {
unsigned int depth;
@@ -342,9 +343,9 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr
MG2DLevel *level = arg;
for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) {
- const ptrdiff_t idx_dst = job_idx * level->solver->u_stride + idx0;
+ const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0;
const ptrdiff_t idx_src = job_idx * level->prolong_tmp_stride + idx0;
- level->solver->u[idx_dst] -= level->prolong_tmp[idx_src];
+ level->solver->u->data[idx_dst] -= level->prolong_tmp[idx_src];
}
return 0;
}
@@ -354,17 +355,17 @@ static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
EGSContext *s_src = src->solver;
EGSContext *s_dst = dst->solver;
if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
- mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride,
- s_src, s_src->residual, s_src->residual_stride);
+ mg_restrict_fw(s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+ s_src, s_src->residual->data, s_src->residual->stride[0]);
} else {
if (ctx->fd_stencil == 1)
mg_interp_bilinear(ctx->priv->tp,
- s_dst, s_dst->rhs, s_dst->rhs_stride,
- s_src, s_src->residual, s_src->residual_stride);
+ s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+ s_src, s_src->residual->data, s_src->residual->stride[0]);
else
mg_interp_bicubic(ctx->priv->tp,
- s_dst, s_dst->rhs, s_dst->rhs_stride,
- s_src, s_src->residual, s_src->residual_stride);
+ s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+ s_src, s_src->residual->data, s_src->residual->stride[0]);
}
}
@@ -375,19 +376,19 @@ static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) {
if (ctx->fd_stencil == 1)
mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ s_src, s_src->u->data, s_src->u->stride[0]);
else
mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ s_src, s_src->u->data, s_src->u->stride[0]);
} else {
if (ctx->fd_stencil == 1)
mg_interp_bilinear(ctx->priv->tp,
s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ s_src, s_src->u->data, s_src->u->stride[0]);
else
mg_interp_bicubic(ctx->priv->tp,
s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
- s_src, s_src->u, s_src->u_stride);
+ s_src, s_src->u->data, s_src->u->stride[0]);
}
}
@@ -421,7 +422,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
/* on the refined levels, use zero as the initial guess for the
* solution (correction for the upper level) */
if (level->depth > 0) {
- memset(level->solver->u, 0, sizeof(*level->solver->u) * level->solver->u_stride *
+ memset(level->solver->u->data, 0, sizeof(*level->solver->u->data) * level->solver->u->stride[0] *
level->solver->domain_size[1]);
/* re-init the current-level solver (re-calc the residual) */
@@ -539,14 +540,14 @@ static void restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *sr
EGSContext *s_dst = dst->solver;
if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
- mg_restrict_inject(s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride,
- s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride);
+ mg_restrict_inject(s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
+ s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
}
} else {
for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
mg_interp_bilinear(ctx->priv->tp,
- s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride,
- s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride);
+ s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
+ s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
}
}
}
@@ -580,10 +581,10 @@ static int mg_levels_init(MG2DContext *ctx)
cur = priv->root;
prev = NULL;
- array_copy(cur->solver->u, cur->solver->u_stride, ctx->u, ctx->u_stride, ctx->domain_size, ctx->domain_size);
- array_copy(cur->solver->rhs, cur->solver->rhs_stride, ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size);
+ array_copy(cur->solver->u->data, cur->solver->u->stride[0], ctx->u, ctx->u_stride, ctx->domain_size, ctx->domain_size);
+ array_copy(cur->solver->rhs->data, cur->solver->rhs->stride[0], ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size);
for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
- array_copy(cur->solver->diff_coeffs[i], cur->solver->diff_coeffs_stride,
+ array_copy(cur->solver->diff_coeffs[i]->data, cur->solver->diff_coeffs[i]->stride[0],
ctx->diff_coeffs[i], ctx->diff_coeffs_stride,
ctx->domain_size, ctx->domain_size);
}
@@ -709,7 +710,7 @@ int mg2d_solve(MG2DContext *ctx)
mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n",
i, res_cur);
- array_copy(ctx->u, ctx->u_stride, s_root->u, s_root->u_stride, ctx->domain_size, ctx->domain_size);
+ array_copy(ctx->u, ctx->u_stride, s_root->u->data, s_root->u->stride[0], ctx->domain_size, ctx->domain_size);
priv->time_solve += gettime() - time_start;
priv->count_solve++;
diff --git a/relax_test.c b/relax_test.c
index 1e9b962..54155ca 100644
--- a/relax_test.c
+++ b/relax_test.c
@@ -8,6 +8,7 @@
#include "log.h"
#include "mg2d_boundary.h"
#include "mg2d_constants.h"
+#include "ndarray.h"
#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
#define DOMAIN_SIZE 1.0
@@ -113,24 +114,24 @@ int main(int argc, char **argv)
for (size_t y = 0; y < ctx->domain_size[1]; y++) {
const double y_coord = y * ctx->step[1];
- memset(ctx->u + y * ctx->u_stride, 0, sizeof(*ctx->u) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->u, 0, y), 0, sizeof(*ctx->u->data) * ctx->domain_size[0]);
for (size_t x = 0; x < ctx->domain_size[0]; x++) {
const double x_coord = x * ctx->step[0];
- ctx->diff_coeffs[MG2D_DIFF_COEFF_02][ctx->diff_coeffs_stride * y + x] = 1.0;
- ctx->diff_coeffs[MG2D_DIFF_COEFF_20][ctx->diff_coeffs_stride * y + x] = 1.0;
- ctx->diff_coeffs[MG2D_DIFF_COEFF_11][ctx->diff_coeffs_stride * y + x] = 1.0;
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], x, y) = 1.0;
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], x, y) = 1.0;
+ *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_11], x, y) = 1.0;
- ctx->rhs[y * ctx->rhs_stride + x] = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord);
+ *NDPTR2D(ctx->rhs, x, y) = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord);
}
- memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_00] + y * ctx->diff_coeffs_stride, 0,
- sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
- memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_01] + y * ctx->diff_coeffs_stride, 0,
- sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
- memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_10] + y * ctx->diff_coeffs_stride, 0,
- sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+ memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 0, y), 0,
+ sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
}
ret = mg2di_egs_init(ctx);
@@ -140,7 +141,7 @@ int main(int argc, char **argv)
goto fail;
}
- res_old = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]);
+ res_old = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]);
for (int i = 0; i < maxiter; i++) {
ret = mg2di_egs_solve(ctx);
@@ -149,7 +150,7 @@ int main(int argc, char **argv)
ret = 1;
goto fail;
}
- res_new = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]);
+ res_new = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]);
if (res_new > 1e3) {
fprintf(stderr, "Diverged at step %d: %g -> %g\n", i, res_old, res_new);
goto fail;
@@ -166,7 +167,7 @@ int main(int argc, char **argv)
for (size_t x = 0; x < ctx->domain_size[0]; x++) {
const double x_coord = x * ctx->step[0];
- double err = fabs(ctx->u[y * ctx->u_stride + x] - sol(x_coord, y_coord));
+ double err = fabs(ctx->u->data[y * ctx->u->stride[0] + x] - sol(x_coord, y_coord));
if (err > max_err)
max_err = err;
}