summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-26 17:29:53 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-26 17:30:25 +0100
commit0e3f76cc16fa93a2b8d58922bae090828bfbf05a (patch)
tree0ad6ef2562b87ff75c834065341eb0751095e2e6
parent9dc3b1d7f2a3cb2a072b62370ea192b4ebad9a36 (diff)
egs_exact: avoid explicit matrix transpoe
-rw-r--r--ell_grid_solve.c148
-rw-r--r--ell_grid_solve.h2
-rw-r--r--mg2d.c3
3 files changed, 73 insertions, 80 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index adc7833..dc0210e 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -47,8 +47,9 @@ typedef struct EGSRelaxInternal {
} EGSRelaxInternal;
typedef struct EGSExactInternal {
- void (*fill_mat)(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
- ptrdiff_t idx_src, ptrdiff_t row_stride);
+ void (*fill_mat)(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride,
+ NDArray **diff_coeffs, double *fd_factors,
+ ptrdiff_t idx_src);
size_t N;
size_t N_ghosts;
@@ -303,78 +304,79 @@ static int solve_relax_step(EGSContext *ctx)
return 0;
}
-static void fill_mat_s1(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
- ptrdiff_t idx_src, ptrdiff_t row_stride)
+static void fill_mat_s1(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride,
+ NDArray **diff_coeffs, double *fd_factors, ptrdiff_t idx_src)
{
mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
- mat_row[ 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[-1] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[ 1 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[-1 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[ 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[-1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[ 1 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[-1 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[ 1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[ 0] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[-1] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[ 1 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[ 0 * mat_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[-1 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[ 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[ 0 * row_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[-1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[ 1 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[ 0 * row_stride * mat_stride] += -2.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[-1 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[ 1 + 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 1 - 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-1 + 1 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-1 - 1 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 1 + 1 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 1 - 1 * row_stride) * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-1 + 1 * row_stride) * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-1 - 1 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
-static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_factors,
- ptrdiff_t idx_src, ptrdiff_t row_stride)
+static void fill_mat_s2(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride,
+ NDArray **diff_coeffs, double *fd_factors,
+ ptrdiff_t idx_src)
{
mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src];
- mat_row[ 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[ 1] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[-1] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
- mat_row[-2] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
-
- mat_row[ 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[ 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[-1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
- mat_row[-2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
-
- mat_row[ 2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[ 1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[ 0] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[-1] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
- mat_row[-2] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
-
- mat_row[ 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[ 1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[ 0 * row_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[-1 * row_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
- mat_row[-2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
-
- mat_row[ 2 + 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 2 + 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 2 - 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 2 - 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[ 1 + 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 1 + 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 1 - 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[ 1 - 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[-1 + 2 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-1 + 1 * row_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-1 - 1 * row_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-1 - 2 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
-
- mat_row[-2 + 2 * row_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-2 + 1 * row_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-2 - 1 * row_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
- mat_row[-2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[ 2 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[ 1 * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[-1 * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+ mat_row[-2 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10];
+
+ mat_row[ 2 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[ 1 * row_stride * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[-1 * row_stride * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+ mat_row[-2 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01];
+
+ mat_row[ 2 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[ 1 * mat_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[ 0 * mat_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[-1 * mat_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+ mat_row[-2 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20];
+
+ mat_row[ 2 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[ 1 * row_stride * mat_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[ 0 * row_stride * mat_stride] += -30.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[-1 * row_stride * mat_stride] += 16.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+ mat_row[-2 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02];
+
+ mat_row[( 2 + 2 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 2 + 1 * row_stride) * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 2 - 1 * row_stride) * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 2 - 2 * row_stride) * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[( 1 + 2 * row_stride) * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 1 + 1 * row_stride) * mat_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 1 - 1 * row_stride) * mat_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[( 1 - 2 * row_stride) * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[(-1 + 2 * row_stride) * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-1 + 1 * row_stride) * mat_stride] += -64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-1 - 1 * row_stride) * mat_stride] += 64.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-1 - 2 * row_stride) * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+
+ mat_row[(-2 + 2 * row_stride) * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-2 + 1 * row_stride) * mat_stride] += 8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-2 - 1 * row_stride) * mat_stride] += -8.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
+ mat_row[(-2 - 2 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
static void mat_fill_row(EGSContext *ctx, double *scratch_line,
@@ -387,7 +389,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
int is_bnd[4], boundary;
- ptrdiff_t row_stride;
+ ptrdiff_t row_stride, mat_stride;
ptrdiff_t row_offset;
double *mat_row;
@@ -403,19 +405,22 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
row_stride = ctx->domain_size[0] + 2 * ctx->fd_stencil;
mat_row = scratch_line + row_stride * ctx->fd_stencil + ctx->fd_stencil;
+ mat_stride = 1;
} else {
- mat_row = e->mat + e->N * mat_row_idx;
+ mat_row = e->mat + mat_row_idx;
row_stride = ctx->domain_size[0];
+ mat_stride = e->N;
}
row_offset = idx1 * row_stride + idx0;
- e->fill_mat(mat_row + row_offset, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride);
+ e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src);
e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
if (boundary) {
- double *mat_dst = e->mat + e->N * mat_row_idx;
+ double *mat_dst = e->mat + mat_row_idx;
+ ptrdiff_t mat_stride_dst = e->N;
const MG2DBoundary *bnd_fixval = NULL;
@@ -429,7 +434,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
bnd_fixval = ctx->boundaries[bnd_loc];
- mat_dst[mat_row_idx] = 1.0;
+ mat_dst[mat_row_idx * mat_stride_dst] = 1.0;
e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]];
memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
break;
@@ -519,7 +524,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
/* copy the interior values */
for (ptrdiff_t idx1_col = 0; idx1_col < ctx->domain_size[1]; idx1_col++)
for (ptrdiff_t idx0_col = 0; idx0_col < ctx->domain_size[0]; idx0_col++) {
- mat_dst[idx1_col * ctx->domain_size[0] + idx0_col] = mat_row[idx1_col * row_stride + idx0_col];
+ mat_dst[(idx1_col * ctx->domain_size[0] + idx0_col) * mat_stride_dst] = mat_row[idx1_col * row_stride + idx0_col];
mat_row[idx1_col * row_stride + idx0_col] = 0.0;
}
@@ -563,14 +568,6 @@ static int solve_exact(EGSContext *ctx)
start = gettime();
- for (int i = 0; i < e->N; i++)
- for (int j = i + 1; j < e->N; j++) {
- double tmp = e->mat[j * e->N + i];
- e->mat[j * e->N + i] = e->mat[i * e->N + j];
- e->mat[i * e->N + j] = tmp;
- }
- ec->time_mat_transpose += gettime() - start;
- ec->count_mat_transpose++;
start = gettime();
@@ -608,7 +605,6 @@ static int solve_exact(EGSContext *ctx)
return ret;
}
- start = gettime();
for (size_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
memcpy(ctx->u->data + idx1 * ctx->u->stride[0], e->x + idx1 * ctx->domain_size[0], ctx->domain_size[0] * sizeof(*e->x));
diff --git a/ell_grid_solve.h b/ell_grid_solve.h
index abe3958..182b087 100644
--- a/ell_grid_solve.h
+++ b/ell_grid_solve.h
@@ -91,8 +91,6 @@ typedef struct EGSRelaxContext {
typedef struct EGSExactContext {
int64_t count_mat_construct;
int64_t time_mat_construct;
- int64_t count_mat_transpose;
- int64_t time_mat_transpose;
int64_t count_bicgstab_solve;
int64_t time_bicgstab_solve;
int64_t count_lu_solve;
diff --git a/mg2d.c b/mg2d.c
index a1d6950..01c7624 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -814,9 +814,8 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
p += ret;
} else if (e) {
ret = snprintf(p, sizeof(buf) - (p - buf),
- " %2.2f%% const %2.2f%% transp %2.2f%% bicgstab %2.2f%% lu %2.2f%% export",
+ " %2.2f%% const %2.2f%% bicgstab %2.2f%% lu %2.2f%% export",
e->time_mat_construct * 100.0 / level->solver->time_total,
- e->time_mat_transpose * 100.0 / level->solver->time_total,
e->time_bicgstab_solve * 100.0 / level->solver->time_total,
e->time_lu_solve * 100.0 / level->solver->time_total,
e->time_export * 100.0 / level->solver->time_total);