From f9285315eccc26354eef6d8349db777727a63394 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 17 Apr 2019 10:26:30 +0200 Subject: egs: do not assume the same stride for all arrays Also, allocate all the diff coeffs together. --- ell_grid_solve.c | 64 ++++++++++++++++++++++++++++++-------------------------- residual_calc.c | 43 ++++++++++++++++++++++++------------- residual_calc.h | 9 +++++--- 3 files changed, 68 insertions(+), 48 deletions(-) diff --git a/ell_grid_solve.c b/ell_grid_solve.c index dfc48c6..c29f69b 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -72,13 +72,14 @@ typedef struct EGSExactInternal { } EGSExactInternal; struct EGSInternal { - ptrdiff_t stride; NDArray *u_base; NDArray *rhs_base; NDArray *residual_base; - NDArray *diff_coeffs_base[MG2D_DIFF_COEFF_NB]; - ptrdiff_t residual_calc_offset; + /* all the diff coeffs concatenated */ + NDArray *diff_coeffs_base; + + ptrdiff_t residual_calc_offset[2]; size_t residual_calc_size[2]; double fd_factors[MG2D_DIFF_COEFF_NB]; @@ -122,12 +123,16 @@ static void residual_calc(EGSContext *ctx) start = gettime(); for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = ctx->diff_coeffs[i]->data + priv->residual_calc_offset; + diff_coeffs[i] = NDPTR2D(ctx->diff_coeffs[i], priv->residual_calc_offset[1], priv->residual_calc_offset[0]); - mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride, - &ctx->residual_max, ctx->residual->data + priv->residual_calc_offset, - ctx->u->data + priv->residual_calc_offset, ctx->rhs->data + priv->residual_calc_offset, - diff_coeffs, priv->fd_factors); + mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, &ctx->residual_max, + NDPTR2D(ctx->residual, priv->residual_calc_offset[1], priv->residual_calc_offset[0]), + ctx->residual->stride[0], + NDPTR2D(ctx->u, priv->residual_calc_offset[1], priv->residual_calc_offset[0]), + ctx->u->stride[0], + NDPTR2D(ctx->rhs, priv->residual_calc_offset[1], priv->residual_calc_offset[0]), + ctx->rhs->stride[0], + diff_coeffs, ctx->diff_coeffs[0]->stride[0], priv->fd_factors); ctx->time_res_calc += gettime() - start; ctx->count_res++; @@ -196,7 +201,7 @@ static void boundaries_apply(EGSContext *ctx, int init) { static const enum MG2DBCType bnd_type_order[] = { MG2D_BC_TYPE_FIXVAL, MG2D_BC_TYPE_REFLECT, MG2D_BC_TYPE_FALLOFF }; EGSInternal *priv = ctx->priv; - const ptrdiff_t strides[2] = { 1, priv->stride }; + const ptrdiff_t strides[2] = { 1, ctx->u->stride[0] }; int64_t start, start_bnd; start = gettime(); @@ -258,7 +263,7 @@ static void boundaries_apply(EGSContext *ctx, int init) int fact_x = dir_x, fact_y = dir_y; double *dst = ctx->u->data - + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * priv->stride) + + mg2d_bnd_is_upper(loc_y) * ((ctx->domain_size[1] - 1) * ctx->u->stride[0]) + mg2d_bnd_is_upper(loc_x) * (ctx->domain_size[0] - 1); if (bnd_y->type == MG2D_BC_TYPE_REFLECT) @@ -411,7 +416,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, EGSInternal *priv = ctx->priv; EGSExactInternal *e = &priv->e; - const ptrdiff_t idx_src = idx1 * priv->stride + idx0; + const ptrdiff_t idx_src_dc = NDIDX2D(ctx->diff_coeffs[0], idx0, idx1); const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0; int is_bnd[4], boundary; @@ -440,7 +445,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, row_offset = idx1 * row_stride + idx0; - e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src); + e->fill_mat(mat_row + row_offset * mat_stride, mat_stride, row_stride, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src_dc); e->rhs_factor[mat_row_idx] = 1.0; @@ -597,9 +602,8 @@ static int solve_exact(EGSContext *ctx) for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { - const ptrdiff_t idx_src = idx1 * priv->stride + idx0; const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0; - e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * ctx->rhs->data[idx_src] + e->rhs_mat[mat_row_idx]; + e->rhs[mat_row_idx] = e->rhs_factor[mat_row_idx] * (*NDPTR2D(ctx->rhs, idx0, idx1)) + e->rhs_mat[mat_row_idx]; } ec->time_mat_construct += gettime() - start; @@ -751,13 +755,10 @@ int mg2di_egs_init(EGSContext *ctx, int flags) } } - priv->residual_calc_offset = 0; priv->residual_calc_size[0] = ctx->domain_size[0]; priv->residual_calc_size[1] = ctx->domain_size[1]; - if (ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL) - priv->residual_calc_offset += ctx->residual->stride[0]; - if (ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL) - priv->residual_calc_offset++; + priv->residual_calc_offset[0] = ctx->boundaries[MG2D_BOUNDARY_1L]->type == MG2D_BC_TYPE_FIXVAL; + priv->residual_calc_offset[1] = ctx->boundaries[MG2D_BOUNDARY_0L]->type == MG2D_BC_TYPE_FIXVAL; for (int bnd_idx = 0; bnd_idx < ARRAY_ELEMS(ctx->boundaries); bnd_idx++) { MG2DBoundary *bnd = ctx->boundaries[bnd_idx]; @@ -807,7 +808,10 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) domain_size[1] + 2 * ghosts, domain_size[0] + 2 * ghosts, }; - const size_t stride = size_padded[0]; + const size_t size_dc[2] = { + size_padded[0] * MG2D_DIFF_COEFF_NB, + size_padded[1], + }; const Slice slice[2] = { SLICE(ghosts, -ghosts, 1), SLICE(ghosts, -ghosts, 1) }; int ret; @@ -837,19 +841,19 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) if (ret < 0) return ret; - for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { - ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base[i], 2, - size_padded, NDARRAY_ALLOC_ZERO); - if (ret < 0) - return ret; + ret = mg2di_ndarray_alloc(&priv->diff_coeffs_base, 2, + size_dc, NDARRAY_ALLOC_ZERO); + if (ret < 0) + return ret; - ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base[i], slice); + for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { + const Slice slice_dc[2] = { SLICE(i * size_padded[0] + ghosts, (i + 1) * size_padded[0] -ghosts, 1), + SLICE(ghosts, -ghosts, 1) }; + ret = mg2di_ndarray_slice(&ctx->diff_coeffs[i], priv->diff_coeffs_base, slice_dc); if (ret < 0) return ret; } - priv->stride = stride; - for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const int ci = mg2d_bnd_coord_idx(i); ctx->boundaries[i] = mg2di_bc_alloc(domain_size[!ci]); @@ -972,10 +976,10 @@ void mg2di_egs_free(EGSContext **pctx) mg2di_ndarray_free(&ctx->residual); mg2di_ndarray_free(&ctx->priv->residual_base); - for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) { + for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { mg2di_ndarray_free(&ctx->diff_coeffs[i]); - mg2di_ndarray_free(&ctx->priv->diff_coeffs_base[i]); } + mg2di_ndarray_free(&ctx->priv->diff_coeffs_base); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); diff --git a/residual_calc.c b/residual_calc.c index 2fc1a66..8f67823 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -32,12 +32,19 @@ typedef struct ResidualCalcTask { size_t line_size; - ptrdiff_t stride; double *dst; + ptrdiff_t dst_stride; + const double *u; + ptrdiff_t u_stride; + const double *rhs; + ptrdiff_t rhs_stride; + const double * const *diff_coeffs; + ptrdiff_t diff_coeffs_stride; + const double *fd_factors; } ResidualCalcTask; @@ -178,24 +185,27 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre ResidualCalcInternal *priv = arg; ResidualCalcTask *task = &priv->task; - const ptrdiff_t offset = job_idx * task->stride; const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = task->diff_coeffs[i] + offset; + diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride; - priv->residual_calc_line(task->line_size, task->dst + offset, + priv->residual_calc_line(task->line_size, task->dst + job_idx * task->dst_stride, priv->residual_max + thread_idx * priv->calc_blocksize, - task->stride, task->u + offset, task->rhs + offset, + task->u_stride, task->u + job_idx * task->u_stride, + task->rhs + job_idx * task->rhs_stride, diff_coeffs, task->fd_factors); return 0; } -int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stride, - double *residual_max, - double *dst, const double *u, const double *rhs, +int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], + double *residual_max, + double *dst, ptrdiff_t dst_stride, + const double *u, ptrdiff_t u_stride, + const double *rhs, ptrdiff_t rhs_stride, const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + ptrdiff_t diff_coeffs_stride, const double *fd_factors) { ResidualCalcInternal *priv = ctx->priv; @@ -204,13 +214,16 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stri memset(priv->residual_max, 0, sizeof(*priv->residual_max) * priv->residual_max_size); - task->line_size = size[0]; - task->stride = stride; - task->dst = dst; - task->u = u; - task->rhs = rhs; - task->diff_coeffs = diff_coeffs; - task->fd_factors = fd_factors; + task->line_size = size[0]; + task->dst = dst; + task->dst_stride = dst_stride; + task->u = u; + task->u_stride = u_stride; + task->rhs = rhs; + task->rhs_stride = rhs_stride; + task->diff_coeffs = diff_coeffs; + task->diff_coeffs_stride = diff_coeffs_stride; + task->fd_factors = fd_factors; tp_execute(ctx->tp, size[1], residual_calc_task, priv); diff --git a/residual_calc.h b/residual_calc.h index af1bf39..c466214 100644 --- a/residual_calc.h +++ b/residual_calc.h @@ -68,10 +68,13 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx); /** * Calculate the residual. */ -int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], ptrdiff_t stride, - double *res_max, - double *dst, const double *u, const double *rhs, +int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], + double *residual_max, + double *dst, ptrdiff_t dst_stride, + const double *u, ptrdiff_t u_stride, + const double *rhs, ptrdiff_t rhs_stride, const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + ptrdiff_t diff_coeffs_stride, const double *fd_factors); #endif // MG2D_RESIDUAL_CALC_H -- cgit v1.2.3