summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-04-17 10:26:30 +0200
committerAnton Khirnov <anton@khirnov.net>2019-04-17 10:27:53 +0200
commitf9285315eccc26354eef6d8349db777727a63394 (patch)
tree99fff854450394d781126481e8b2fc48f4508c64
parent6a92edee9bd679d1f71e66b2cc96456bead219fd (diff)
egs: do not assume the same stride for all arrays
Also, allocate all the diff coeffs together.
-rw-r--r--ell_grid_solve.c64
-rw-r--r--residual_calc.c43
-rw-r--r--residual_calc.h9
3 files changed, 68 insertions, 48 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index dfc48c6..c29f69b 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -72,13 +72,14 @@ typedef struct EGSExactInternal {
} EGSExactInternal;
struct EGSInternal {
- ptrdiff_t stride;
NDArray *u_base;
NDArray *rhs_base;
NDArray *residual_base;
- NDArray *diff_coeffs_base[MG2D_DIFF_COEFF_NB];
- ptrdiff_t residual_calc_offset;
+ /* all the diff coeffs concatenated */
+ NDArray *diff_coeffs_base;
+
+ ptrdiff_t residual_calc_offset[2];
size_t residual_calc_size[2];
double fd_factors[MG2D_DIFF_COEFF_NB];
@@ -122,12 +123,16 @@ static void residual_calc(EGSContext *ctx)
start = gettime();
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset;
+ diff_coeffs[i] = NDPTR2D(ctx->diff_coeffs[i], priv->residual_calc_offset[1], priv->residual_calc_offset[0]);
- mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride,
- &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset,
- ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset,
- diff_coeffs, priv->fd_factors);
+ mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, &ctx->residual_max,
+ NDPTR2D(ctx->residual, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
+ ctx->residual->stride[0],
+ NDPTR2D(ctx->u, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
+ ctx->u->stride[0],
+ NDPTR2D(ctx->rhs, priv->residual_calc_offset[1], priv->residual_calc_offset[0]),
+ ctx->rhs->stride[0],
+ diff_coeffs, ctx->diff_coeffs[0]->stride[0], priv->fd_factors);
ctx->time_res_calc += gettime() - start;
ctx->count_res++;
@@ -196,7 +201,7 @@ static void boundaries_apply(EGSContext *ctx, int init)
{
static const enum MG2DBCType bnd_type_order[] = { MG2D_BC_TYPE_FIXVAL, MG2D_BC_TYPE_REFLECT, MG2D_BC_TYPE_FALLOFF };
EGSInternal *priv = ctx->priv;
- const ptrdiff_t strides[2] = { 1, priv->stride };
+ const ptrdiff_t strides[2] = { 1, ctx->u->stride[0] };
int64_t start, start_bnd;
start = gettime();
@@ -258,7 +263,7 @@ static void boundaries_apply(EGSContext *ctx, int init)
int fact_x = dir_x, fact_y = dir_y;
double *dst = ctx->u->data
- + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride)
+ + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * ctx->u->stride[0])
+ mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1);
if (bnd_y->type == MG2D_BC_TYPE_REFLECT)
@@ -411,7 +416,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
EGSInternal *priv = ctx->priv;
EGSExactInternal *e = &priv->e;
- const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
+ const ptrdiff_t idx_src_dc = NDIDX2D(ctx->diff_coeffs[0], idx0, idx1);
const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
int is_bnd[4], boundary;
@@ -440,7 +445,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line,
row_offset = idx1 * row_stride + idx0;
- e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src);
+ e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src_dc);
e->rhs_factor[mat_row_idx] = 1.0;
@@ -597,9 +602,8 @@ static int solve_exact(EGSContext *ctx)
for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++)
for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
- const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
- e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * ctx->rhs->data[idx_src] + e->rhs_mat[mat_row_idx];
+ e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * (*NDPTR2D(ctx->rhs, idx0, idx1)) + e->rhs_mat[mat_row_idx];
}
ec->time_mat_construct += gettime() - start;
@@ -751,13 +755,10 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
}
}
- priv->residual_calc_offset = 0;
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
- if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL)
- priv->residual_calc_offset += ctx->residual->stride[0];
- if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL)
- priv->residual_calc_offset++;
+ priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL;
+ priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL;
for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(ctx->boundaries); bnd_idx++) {
MG2DBoundary *bnd = ctx->boundaries[bnd_idx];
@@ -807,7 +808,10 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
domain_size[1] + 2 * ghosts,
domain_size[0] + 2 * ghosts,
};
- const size_t stride = size_padded[0];
+ const size_t size_dc[2] = {
+ size_padded[0] * MG2D_DIFF_COEFF_NB,
+ size_padded[1],
+ };
const Slice slice[2] = { SLICE(ghosts, -ghosts, 1),
SLICE(ghosts, -ghosts, 1) };
int ret;
@@ -837,19 +841,19 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
if (ret < 0)
return ret;
- for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
- ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2,
- size_padded, NDARRAY_ALLOC_ZERO);
- if (ret < 0)
- return ret;
+ ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base, 2,
+ size_dc, NDARRAY_ALLOC_ZERO);
+ if (ret < 0)
+ return ret;
- ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice);
+ for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
+ const Slice slice_dc[2] = { SLICE(i * size_padded[0] + ghosts, (i + 1) * size_padded[0] -ghosts, 1),
+ SLICE(ghosts, -ghosts, 1) };
+ ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base, slice_dc);
if (ret < 0)
return ret;
}
- priv->stride = stride;
-
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) {
const int ci = mg2d_bnd_coord_idx(i);
ctx->boundaries[i] = mg2di_bc_alloc(domain_size[!ci]);
@@ -972,10 +976,10 @@ void mg2di_egs_free(EGSContext **pctx)
mg2di_ndarray_free(&ctx->residual);
mg2di_ndarray_free(&ctx->priv->residual_base);
- for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) {
+ for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) {
mg2di_ndarray_free(&ctx->diff_coeffs[i]);
- mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]);
}
+ mg2di_ndarray_free(&ctx->priv->diff_coeffs_base);
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)
mg2di_bc_free(&ctx->boundaries[i]);
diff --git a/residual_calc.c b/residual_calc.c
index 2fc1a66..8f67823 100644
--- a/residual_calc.c
+++ b/residual_calc.c
@@ -32,12 +32,19 @@
typedef struct ResidualCalcTask {
size_t line_size;
- ptrdiff_t stride;
double *dst;
+ ptrdiff_t dst_stride;
+
const double *u;
+ ptrdiff_t u_stride;
+
const double *rhs;
+ ptrdiff_t rhs_stride;
+
const double * const *diff_coeffs;
+ ptrdiff_t diff_coeffs_stride;
+
const double *fd_factors;
} ResidualCalcTask;
@@ -178,24 +185,27 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre
ResidualCalcInternal *priv = arg;
ResidualCalcTask *task = &priv->task;
- const ptrdiff_t offset = job_idx * task->stride;
const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = task->diff_coeffs[i] + offset;
+ diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride;
- priv->residual_calc_line(task->line_size, task->dst + offset,
+ priv->residual_calc_line(task->line_size, task->dst + job_idx * task->dst_stride,
priv->residual_max + thread_idx * priv->calc_blocksize,
- task->stride, task->u + offset, task->rhs + offset,
+ task->u_stride, task->u + job_idx * task->u_stride,
+ task->rhs + job_idx * task->rhs_stride,
diff_coeffs, task->fd_factors);
return 0;
}
-int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stride,
- double *residual_max,
- double *dst, const double *u, const double *rhs,
+int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
+ double *residual_max,
+ double *dst, ptrdiff_t dst_stride,
+ const double *u, ptrdiff_t u_stride,
+ const double *rhs, ptrdiff_t rhs_stride,
const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ ptrdiff_t diff_coeffs_stride,
const double *fd_factors)
{
ResidualCalcInternal *priv = ctx->priv;
@@ -204,13 +214,16 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stri
memset(priv->residual_max, 0, sizeof(*priv->residual_max) * priv->residual_max_size);
- task->line_size = size[0];
- task->stride = stride;
- task->dst = dst;
- task->u = u;
- task->rhs = rhs;
- task->diff_coeffs = diff_coeffs;
- task->fd_factors = fd_factors;
+ task->line_size = size[0];
+ task->dst = dst;
+ task->dst_stride = dst_stride;
+ task->u = u;
+ task->u_stride = u_stride;
+ task->rhs = rhs;
+ task->rhs_stride = rhs_stride;
+ task->diff_coeffs = diff_coeffs;
+ task->diff_coeffs_stride = diff_coeffs_stride;
+ task->fd_factors = fd_factors;
tp_execute(ctx->tp, size[1], residual_calc_task, priv);
diff --git a/residual_calc.h b/residual_calc.h
index af1bf39..c466214 100644
--- a/residual_calc.h
+++ b/residual_calc.h
@@ -68,10 +68,13 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx);
/**
* Calculate the residual.
*/
-int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stride,
- double *res_max,
- double *dst, const double *u, const double *rhs,
+int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
+ double *residual_max,
+ double *dst, ptrdiff_t dst_stride,
+ const double *u, ptrdiff_t u_stride,
+ const double *rhs, ptrdiff_t rhs_stride,
const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ ptrdiff_t diff_coeffs_stride,
const double *fd_factors);
#endif // MG2D_RESIDUAL_CALC_H