From c2ead14543d11a532a8ac564119a7a10160fdc41 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 22 Mar 2019 18:50:55 +0100
Subject: ell_grid_solve: switch to ndarray in its external API

---
 ell_grid_solve.c | 178 ++++++++++++++++++++++++++++++-------------------------
 ell_grid_solve.h |  27 ++-------
 mg2d.c           |  43 +++++++-------
 relax_test.c     |  29 ++++-----
 4 files changed, 139 insertions(+), 138 deletions(-)

diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 117cbd1..49b2321 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -47,7 +47,7 @@ typedef struct EGSRelaxInternal {
 } EGSRelaxInternal;
 
 typedef struct EGSExactInternal {
-    void (*fill_mat)(double *mat_row, double **diff_coeffs, double *fd_factors,
+    void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
                      ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset);
     size_t N;
     size_t N_ghosts;
@@ -111,11 +111,11 @@ static void residual_calc(EGSContext *ctx)
     start = gettime();
 
     for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
-        diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset;
+        diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset;
 
     mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride,
-                        &ctx->residual_max, ctx->residual + priv->residual_calc_offset,
-                        ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset,
+                        &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset,
+                        ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset,
                         diff_coeffs, priv->fd_factors);
 
     ctx->time_res_calc += gettime() - start;
@@ -191,7 +191,7 @@ static void boundaries_apply(EGSContext *ctx)
         const size_t size_boundary      = ctx->domain_size[!ci];
         const size_t size_offset        = ctx->domain_size[ci];
 
-        double *dst = ctx->u + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
+        double *dst = ctx->u->data + mg2d_bnd_is_upper(i) * ((size_offset - 1) * stride_offset);
         const ptrdiff_t dst_strides[] = { stride_boundary, mg2d_bnd_out_dir(i) * stride_offset };
 
         switch (ctx->boundaries[i]->type) {
@@ -222,7 +222,7 @@ static void boundaries_apply(EGSContext *ctx)
             const int            dir_x = mg2d_bnd_out_dir(loc_x);
             int fact_x = dir_x, fact_y = dir_y;
 
-            double *dst = ctx->u
+            double *dst = ctx->u->data
                           + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride)
                           + mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1);
 
@@ -251,9 +251,9 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa
     EGSInternal *priv = ctx->priv;
 
     for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
-        ptrdiff_t idx = job_idx * ctx->u_stride + idx0;
+        ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0;
 
-        ctx->u[idx] += priv->r.relax_factor * ctx->residual[idx];
+        ctx->u->data[idx] += priv->r.relax_factor * ctx->residual->data[idx];
     }
 
     return 0;
@@ -277,78 +277,78 @@ static int solve_relax_step(EGSContext *ctx)
     return 0;
 }
 
-static void fill_mat_s1(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
                         ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
 {
-    mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
+    mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
 
-    mat_row[row_offset + 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-    mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+    mat_row[row_offset + 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+    mat_row[row_offset - 1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
 
-    mat_row[row_offset + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-    mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+    mat_row[row_offset + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+    mat_row[row_offset - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
 
-    mat_row[row_offset + 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset - 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset + 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset + 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset - 1] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
 
-    mat_row[row_offset + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset - 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset + 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset - 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
 
 
-    mat_row[row_offset + 1 + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 1 - 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 1 + 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 1 - 1 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
 }
 
-static void fill_mat_s2(double *mat_row, double **diff_coeffs, double *fd_factors,
+static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
                         ptrdiff_t idx_src, ptrdiff_t row_stride, ptrdiff_t row_offset)
 {
-    mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00][idx_src];
-
-    mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-    mat_row[row_offset + 1] +=  8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-    mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-    mat_row[row_offset - 2] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_10][idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-
-    mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-    mat_row[row_offset + 1 * row_stride] +=  8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-    mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-    mat_row[row_offset - 2 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_01][idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-
-    mat_row[row_offset + 2] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset + 1] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset - 1] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-    mat_row[row_offset - 2] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20][idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-
-    mat_row[row_offset + 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset + 1 * row_stride] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset - 1 * row_stride] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-    mat_row[row_offset - 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02][idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-
-    mat_row[row_offset + 2 + 2 * row_stride] +=   1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 2 + 1 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 2 - 1 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 2 - 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
-    mat_row[row_offset + 1 + 2 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 1 + 1 * row_stride] +=  64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset + 1 - 2 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
-    mat_row[row_offset - 1 + 2 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 1 - 1 * row_stride] +=  64.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 1 - 2 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
-    mat_row[row_offset - 2 + 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 2 + 1 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 2 - 1 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-    mat_row[row_offset - 2 - 2 * row_stride] +=   1.0 * diff_coeffs[MG2D_DIFF_COEFF_11][idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
+
+    mat_row[row_offset + 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+    mat_row[row_offset + 1] +=  8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+    mat_row[row_offset - 1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+    mat_row[row_offset - 2] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+
+    mat_row[row_offset + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+    mat_row[row_offset + 1 * row_stride] +=  8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+    mat_row[row_offset - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+    mat_row[row_offset - 2 * row_stride] +=  1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+
+    mat_row[row_offset + 2] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset + 1] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset + 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset - 1] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+    mat_row[row_offset - 2] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+
+    mat_row[row_offset + 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset + 1 * row_stride] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset + 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset - 1 * row_stride] +=  16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+    mat_row[row_offset - 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+
+    mat_row[row_offset + 2 + 2 * row_stride] +=   1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 2 + 1 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 2 - 1 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 2 - 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+    mat_row[row_offset + 1 + 2 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 1 + 1 * row_stride] +=  64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset + 1 - 2 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+    mat_row[row_offset - 1 + 2 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 1 - 1 * row_stride] +=  64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 1 - 2 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+    mat_row[row_offset - 2 + 2 * row_stride] +=  -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 2 + 1 * row_stride] +=   8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 2 - 1 * row_stride] +=  -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+    mat_row[row_offset - 2 - 2 * row_stride] +=   1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
 }
 
 static int solve_exact(EGSContext *ctx)
@@ -396,7 +396,7 @@ static int solve_exact(EGSContext *ctx)
 
             e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset);
 
-            e->rhs[mat_row_idx] = ctx->rhs[idx_src];
+            e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
 
             if (boundary) {
                 double *mat_dst = e->mat + e->N * mat_row_idx;
@@ -554,7 +554,7 @@ static int solve_exact(EGSContext *ctx)
     }
 
     for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
-        memcpy(ctx->u + idx1 * ctx->u_stride, e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
+        memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
 
     ec->time_lin_solve += gettime() - start;
     ec->count_lin_solve++;
@@ -637,7 +637,7 @@ int mg2di_egs_init(EGSContext *ctx)
     priv->residual_calc_size[0] = ctx->domain_size[0];
     priv->residual_calc_size[1] = ctx->domain_size[1];
     if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL)
-        priv->residual_calc_offset += ctx->residual_stride;
+        priv->residual_calc_offset += ctx->residual->stride[0];
     if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL)
         priv->residual_calc_offset++;
 
@@ -675,7 +675,6 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
         domain_size[0] + 2 * ghosts,
     };
     const size_t stride = size_padded[0];
-    const size_t start_offset = ghosts * stride + ghosts;
     const Slice slice[2] = { SLICE(ghosts, -ghosts, 1),
                              SLICE(ghosts, -ghosts, 1) };
     int ret;
@@ -683,30 +682,38 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
     ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_padded, 0);
     if (ret < 0)
         return ret;
-    ctx->u        = priv->u_base->data + start_offset;
-    ctx->u_stride = priv->u_base->stride[0];
+
+    ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice);
+    if (ret < 0)
+        return ret;
 
     ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_padded, 0);
     if (ret < 0)
         return ret;
-    ctx->rhs        = priv->rhs_base->data + start_offset;
-    ctx->rhs_stride = priv->rhs_base->stride[0];
+
+    ret = mg2di_ndarray_slice(&ctx->rhs, priv->rhs_base, slice);
+    if (ret < 0)
+        return ret;
 
     ret = mg2di_ndarray_alloc(&priv->residual_base, 2, size_padded,
                               NDARRAY_ALLOC_ZERO);
     if (ret < 0)
         return ret;
-    ctx->residual        = priv->residual_base->data + start_offset;
-    ctx->residual_stride = priv->residual_base->stride[0];
+
+    ret = mg2di_ndarray_slice(&ctx->residual, priv->residual_base, slice);
+    if (ret < 0)
+        return ret;
 
     for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
         ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2,
                                   size_padded, NDARRAY_ALLOC_ZERO);
         if (ret < 0)
             return ret;
-        ctx->diff_coeffs[i] = priv->diff_coeffs_base[i]->data + start_offset;
+
+        ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice);
+        if (ret < 0)
+            return ret;
     }
-    ctx->diff_coeffs_stride = stride;
 
     priv->stride = stride;
 
@@ -820,13 +827,20 @@ void mg2di_egs_free(EGSContext **pctx)
         mg2di_bicgstab_context_free(&e->bicgstab);
     }
 
-    mg2di_ndarray_free(&ctx->a_u);
+    mg2di_ndarray_free(&ctx->u);
     mg2di_ndarray_free(&ctx->priv->u_base);
 
+    mg2di_ndarray_free(&ctx->rhs);
     mg2di_ndarray_free(&ctx->priv->rhs_base);
+
+    mg2di_ndarray_free(&ctx->residual);
     mg2di_ndarray_free(&ctx->priv->residual_base);
-    for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++)
+
+    for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) {
+        mg2di_ndarray_free(&ctx->diff_coeffs[i]);
         mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]);
+    }
+
     for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)
         mg2di_bc_free(&ctx->boundaries[i]);
 
diff --git a/ell_grid_solve.h b/ell_grid_solve.h
index 5455c13..d56616b 100644
--- a/ell_grid_solve.h
+++ b/ell_grid_solve.h
@@ -43,6 +43,7 @@
 #include "log.h"
 #include "mg2d_boundary.h"
 #include "mg2d_constants.h"
+#include "ndarray.h"
 
 enum EGSType {
     /**
@@ -156,11 +157,7 @@ typedef struct EGSContext {
      * initial guess.
      * Afterwards updated in mg2di_egs_step().
      */
-    double   *u;
-    /**
-     * Distance between neighbouring gridpoints along coord1.
-     */
-    ptrdiff_t u_stride;
+    NDArray *u;
 
     /**
      * Values of the right-hand side.
@@ -168,24 +165,16 @@ typedef struct EGSContext {
      * Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
      * Must be filled by the caller before mg2di_egs_init().
      */
-    double    *rhs;
-    /**
-     * Distance between neighbouring gridpoints along coord1.
-     */
-    ptrdiff_t  rhs_stride;
+    NDArray *rhs;
 
     /**
-     * Values of the right-hand side.
+     * Values of the residual.
      *
      * Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
      * Read-only for the caller. Initialized after mg2di_egs_init(),
      * afterwards updated in mg2di_egs_step().
      */
-    double   *residual;
-    /**
-     * Distance between neighbouring gridpoints along coord1.
-     */
-    ptrdiff_t residual_stride;
+    NDArray *residual;
 
     /**
      * Maximum of the absolute value of residual.
@@ -198,11 +187,7 @@ typedef struct EGSContext {
      * Allocated by the solver in mg2di_egs_alloc(), owned by the solver.
      * Must be filled by the caller before mg2di_egs_init().
      */
-    double   *diff_coeffs[MG2D_DIFF_COEFF_NB];
-    /**
-     * Distance between neighbouring gridpoints along coord1.
-     */
-    ptrdiff_t diff_coeffs_stride;
+    NDArray *diff_coeffs[MG2D_DIFF_COEFF_NB];
 
     /* timings */
     int64_t time_boundaries;
diff --git a/mg2d.c b/mg2d.c
index 9949eee..a9b98a3 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -33,6 +33,7 @@
 #include "mg2d_boundary.h"
 #include "mg2d_boundary_internal.h"
 #include "mg2d_constants.h"
+#include "ndarray.h"
 
 typedef struct MG2DLevel {
     unsigned int  depth;
@@ -342,9 +343,9 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr
     MG2DLevel *level = arg;
 
     for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) {
-        const ptrdiff_t idx_dst = job_idx * level->solver->u_stride + idx0;
+        const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0;
         const ptrdiff_t idx_src = job_idx * level->prolong_tmp_stride + idx0;
-        level->solver->u[idx_dst] -= level->prolong_tmp[idx_src];
+        level->solver->u->data[idx_dst] -= level->prolong_tmp[idx_src];
     }
     return 0;
 }
@@ -354,17 +355,17 @@ static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
     EGSContext *s_src = src->solver;
     EGSContext *s_dst = dst->solver;
     if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
-        mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride,
-                       s_src, s_src->residual, s_src->residual_stride);
+        mg_restrict_fw(s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+                       s_src, s_src->residual->data, s_src->residual->stride[0]);
     } else {
         if (ctx->fd_stencil == 1)
             mg_interp_bilinear(ctx->priv->tp,
-                               s_dst, s_dst->rhs, s_dst->rhs_stride,
-                               s_src, s_src->residual, s_src->residual_stride);
+                               s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+                               s_src, s_src->residual->data, s_src->residual->stride[0]);
         else
             mg_interp_bicubic(ctx->priv->tp,
-                              s_dst, s_dst->rhs, s_dst->rhs_stride,
-                              s_src, s_src->residual, s_src->residual_stride);
+                              s_dst, s_dst->rhs->data, s_dst->rhs->stride[0],
+                              s_src, s_src->residual->data, s_src->residual->stride[0]);
     }
 }
 
@@ -375,19 +376,19 @@ static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src)
     if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) {
         if (ctx->fd_stencil == 1)
             mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
-                          s_src, s_src->u, s_src->u_stride);
+                          s_src, s_src->u->data, s_src->u->stride[0]);
         else
             mg_prolongate_bicubic(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
-                                  s_src, s_src->u, s_src->u_stride);
+                                  s_src, s_src->u->data, s_src->u->stride[0]);
     } else {
         if (ctx->fd_stencil == 1)
             mg_interp_bilinear(ctx->priv->tp,
                                s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
-                               s_src, s_src->u, s_src->u_stride);
+                               s_src, s_src->u->data, s_src->u->stride[0]);
         else
             mg_interp_bicubic(ctx->priv->tp,
                               s_dst, dst->prolong_tmp, dst->prolong_tmp_stride,
-                              s_src, s_src->u, s_src->u_stride);
+                              s_src, s_src->u->data, s_src->u->stride[0]);
     }
 }
 
@@ -421,7 +422,7 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level)
     /* on the refined levels, use zero as the initial guess for the
      * solution (correction for the upper level) */
     if (level->depth > 0) {
-        memset(level->solver->u, 0, sizeof(*level->solver->u) * level->solver->u_stride *
+        memset(level->solver->u->data, 0, sizeof(*level->solver->u->data) * level->solver->u->stride[0] *
                 level->solver->domain_size[1]);
 
         /* re-init the current-level solver (re-calc the residual) */
@@ -539,14 +540,14 @@ static void restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *sr
     EGSContext *s_dst = dst->solver;
     if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) {
         for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
-            mg_restrict_inject(s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride,
-                               s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride);
+            mg_restrict_inject(s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
+                               s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
         }
     } else {
         for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) {
             mg_interp_bilinear(ctx->priv->tp,
-                               s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride,
-                               s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride);
+                               s_dst, s_dst->diff_coeffs[i]->data, s_dst->diff_coeffs[i]->stride[0],
+                               s_src, s_src->diff_coeffs[i]->data, s_src->diff_coeffs[i]->stride[0]);
         }
     }
 }
@@ -580,10 +581,10 @@ static int mg_levels_init(MG2DContext *ctx)
     cur  = priv->root;
     prev = NULL;
 
-    array_copy(cur->solver->u,   cur->solver->u_stride,   ctx->u,   ctx->u_stride,   ctx->domain_size, ctx->domain_size);
-    array_copy(cur->solver->rhs, cur->solver->rhs_stride, ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size);
+    array_copy(cur->solver->u->data,   cur->solver->u->stride[0],   ctx->u,   ctx->u_stride,   ctx->domain_size, ctx->domain_size);
+    array_copy(cur->solver->rhs->data, cur->solver->rhs->stride[0], ctx->rhs, ctx->rhs_stride, ctx->domain_size, ctx->domain_size);
     for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
-        array_copy(cur->solver->diff_coeffs[i], cur->solver->diff_coeffs_stride,
+        array_copy(cur->solver->diff_coeffs[i]->data, cur->solver->diff_coeffs[i]->stride[0],
                    ctx->diff_coeffs[i], ctx->diff_coeffs_stride,
                    ctx->domain_size, ctx->domain_size);
     }
@@ -709,7 +710,7 @@ int mg2d_solve(MG2DContext *ctx)
             mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n",
                       i, res_cur);
 
-            array_copy(ctx->u, ctx->u_stride, s_root->u, s_root->u_stride, ctx->domain_size, ctx->domain_size);
+            array_copy(ctx->u, ctx->u_stride, s_root->u->data, s_root->u->stride[0], ctx->domain_size, ctx->domain_size);
 
             priv->time_solve += gettime() - time_start;
             priv->count_solve++;
diff --git a/relax_test.c b/relax_test.c
index 1e9b962..54155ca 100644
--- a/relax_test.c
+++ b/relax_test.c
@@ -8,6 +8,7 @@
 #include "log.h"
 #include "mg2d_boundary.h"
 #include "mg2d_constants.h"
+#include "ndarray.h"
 
 #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
 #define DOMAIN_SIZE 1.0
@@ -113,24 +114,24 @@ int main(int argc, char **argv)
     for (size_t y = 0; y < ctx->domain_size[1]; y++) {
         const double y_coord = y * ctx->step[1];
 
-        memset(ctx->u + y * ctx->u_stride, 0, sizeof(*ctx->u) * ctx->domain_size[0]);
+        memset(NDPTR2D(ctx->u, 0, y), 0, sizeof(*ctx->u->data) * ctx->domain_size[0]);
 
         for (size_t x = 0; x < ctx->domain_size[0]; x++) {
             const double x_coord = x * ctx->step[0];
 
-            ctx->diff_coeffs[MG2D_DIFF_COEFF_02][ctx->diff_coeffs_stride * y + x] = 1.0;
-            ctx->diff_coeffs[MG2D_DIFF_COEFF_20][ctx->diff_coeffs_stride * y + x] = 1.0;
-            ctx->diff_coeffs[MG2D_DIFF_COEFF_11][ctx->diff_coeffs_stride * y + x] = 1.0;
+            *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], x, y) =  1.0;
+            *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], x, y) =  1.0;
+            *NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_11], x, y) =  1.0;
 
-            ctx->rhs[y * ctx->rhs_stride + x] = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord);
+            *NDPTR2D(ctx->rhs, x, y) = sol_dxx(x_coord, y_coord) + sol_dyy(x_coord, y_coord) + sol_dxy(x_coord, y_coord);
         }
 
-        memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_00] + y * ctx->diff_coeffs_stride, 0,
-               sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
-        memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_01] + y * ctx->diff_coeffs_stride, 0,
-               sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
-        memset(ctx->diff_coeffs[MG2D_DIFF_COEFF_10] + y * ctx->diff_coeffs_stride, 0,
-               sizeof(*ctx->diff_coeffs[0]) * ctx->domain_size[0]);
+        memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], 0, y), 0,
+               sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+        memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_01], 0, y), 0,
+               sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
+        memset(NDPTR2D(ctx->diff_coeffs[MG2D_DIFF_COEFF_10], 0, y), 0,
+               sizeof(*ctx->diff_coeffs[0]->data) * ctx->domain_size[0]);
     }
 
     ret = mg2di_egs_init(ctx);
@@ -140,7 +141,7 @@ int main(int argc, char **argv)
         goto fail;
     }
 
-    res_old = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]);
+    res_old = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]);
 
     for (int i = 0; i < maxiter; i++) {
         ret = mg2di_egs_solve(ctx);
@@ -149,7 +150,7 @@ int main(int argc, char **argv)
             ret = 1;
             goto fail;
         }
-        res_new = findmax(ctx->residual, ctx->residual_stride * ctx->domain_size[1]);
+        res_new = findmax(ctx->residual->data, ctx->residual->stride[0] * ctx->domain_size[1]);
         if (res_new > 1e3) {
             fprintf(stderr, "Diverged at step %d: %g -> %g\n", i, res_old, res_new);
             goto fail;
@@ -166,7 +167,7 @@ int main(int argc, char **argv)
 
             for (size_t x = 0; x < ctx->domain_size[0]; x++) {
                 const double x_coord = x * ctx->step[0];
-                double err = fabs(ctx->u[y * ctx->u_stride + x] - sol(x_coord, y_coord));
+                double err = fabs(ctx->u->data[y * ctx->u->stride[0] + x] - sol(x_coord, y_coord));
                 if (err > max_err)
                     max_err = err;
             }
-- 
cgit v1.2.3