summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-03-25 19:33:12 +0100
committerAnton Khirnov <anton@khirnov.net>2019-03-25 19:33:12 +0100
commit7c18c16aeab0476dcc0a7e1d10a3f8ca0db753ef (patch)
tree2549d68c46625e61b9d7208d498263d959a34ef6
parentc31ff38461861d397580cf3f7aa397c5c1cfdfe7 (diff)
egs_exact: parallelize matrix construction
-rw-r--r--ell_grid_solve.c298
1 files changed, 165 insertions, 133 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 12863d0..184c9e3 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -56,9 +56,11 @@ typedef struct EGSExactInternal {
double *mat_f;
double *rhs;
double *x;
- double *scratch_line;
int *ipiv;
+ double *scratch_lines;
+ ptrdiff_t scratch_lines_allocated;
+
BiCGStabContext *bicgstab;
} EGSExactInternal;
@@ -375,171 +377,187 @@ static void fill_mat_s2(double *mat_row, NDArray **diff_coeffs, double *fd_facto
mat_row[row_offset - 2 - 2 * row_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11];
}
-static int solve_exact(EGSContext *ctx)
+static void mat_fill_row(EGSContext *ctx, double *scratch_line,
+ ptrdiff_t idx1, ptrdiff_t idx0)
{
EGSInternal *priv = ctx->priv;
- EGSExactContext *ec = ctx->solver_data;
EGSExactInternal *e = &priv->e;
- int64_t start;
- int ret;
- start = gettime();
+ const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
+ const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
- memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
+ int is_bnd[4], boundary;
+ ptrdiff_t row_stride;
+ ptrdiff_t row_offset;
- for (ptrdiff_t idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) {
- int is_bnd[4], boundary;
+ double *mat_row;
- is_bnd[MG2D_BOUNDARY_1L] = idx1 < ctx->fd_stencil;
- is_bnd[MG2D_BOUNDARY_1U] = idx1 >= ctx->domain_size[1] - ctx->fd_stencil;
+ is_bnd[MG2D_BOUNDARY_0L] = idx0 < ctx->fd_stencil;
+ is_bnd[MG2D_BOUNDARY_0U] = idx0 >= ctx->domain_size[0] - ctx->fd_stencil;
+ is_bnd[MG2D_BOUNDARY_1L] = idx1 < ctx->fd_stencil;
+ is_bnd[MG2D_BOUNDARY_1U] = idx1 >= ctx->domain_size[1] - ctx->fd_stencil;
- for (ptrdiff_t idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
- const ptrdiff_t idx_src = idx1 * priv->stride + idx0;
- const ptrdiff_t mat_row_idx = idx1 * ctx->domain_size[0] + idx0;
+ boundary = is_bnd[0] || is_bnd[1] || is_bnd[2] || is_bnd[3];
- ptrdiff_t row_stride;
- ptrdiff_t row_offset;
+ if (boundary) {
+ memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
+ row_stride = ctx->domain_size[0] + 2 * ctx->fd_stencil;
+ mat_row = scratch_line + row_stride * ctx->fd_stencil + ctx->fd_stencil;
+ } else {
+ mat_row = e->mat + e->N * mat_row_idx;
+ row_stride = ctx->domain_size[0];
+ }
- double *mat_row;
+ row_offset = idx1 * row_stride + idx0;
- is_bnd[MG2D_BOUNDARY_0L] = idx0 < ctx->fd_stencil;
- is_bnd[MG2D_BOUNDARY_0U] = idx0 >= ctx->domain_size[0] - ctx->fd_stencil;
+ e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset);
- boundary = is_bnd[0] || is_bnd[1] || is_bnd[2] || is_bnd[3];
+ e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
- if (boundary) {
- memset(e->scratch_line, 0, e->N_ghosts * sizeof(*e->scratch_line));
- row_stride = ctx->domain_size[0] + 2 * ctx->fd_stencil;
- mat_row = e->scratch_line + row_stride * ctx->fd_stencil + ctx->fd_stencil;
- } else {
- mat_row = e->mat + e->N * mat_row_idx;
- row_stride = ctx->domain_size[0];
- }
+ if (boundary) {
+ double *mat_dst = e->mat + e->N * mat_row_idx;
- row_offset = idx1 * row_stride + idx0;
+ const MG2DBoundary *bnd_fixval = NULL;
- e->fill_mat(mat_row, ctx->diff_coeffs, ctx->priv->fd_factors, idx_src, row_stride, row_offset);
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const ptrdiff_t idx[] = { idx0, idx1 };
- e->rhs[mat_row_idx] = ctx->rhs->data[idx_src];
+ if (!(idx[ci] == (ctx->domain_size[ci] - 1) * mg2d_bnd_is_upper(bnd_loc) &&
+ ctx->boundaries[bnd_loc]->type == MG2D_BC_TYPE_FIXVAL))
+ continue;
+
+ bnd_fixval = ctx->boundaries[bnd_loc];
- if (boundary) {
- double *mat_dst = e->mat + e->N * mat_row_idx;
+ mat_dst[mat_row_idx] = 1.0;
+ e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]];
+ memset(scratch_line, 0, e->N_ghosts * sizeof(*scratch_line));
+ break;
+ }
- const MG2DBoundary *bnd_fixval = NULL;
+ if (!bnd_fixval) {
+ /* apply the boundary conditions to eliminate the ghostpoint values */
+ // reflect
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
- for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
- const int ci = mg2d_bnd_coord_idx(bnd_loc);
- const ptrdiff_t idx[] = { idx0, idx1 };
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const int dir = mg2d_bnd_out_dir(bnd_loc);
- if (!(idx[ci] == (ctx->domain_size[ci] - 1) * mg2d_bnd_is_upper(bnd_loc) &&
- ctx->boundaries[bnd_loc]->type == MG2D_BC_TYPE_FIXVAL))
- continue;
+ const ptrdiff_t dst_stride[2] = { 1, row_stride };
+ const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
+ const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
- bnd_fixval = ctx->boundaries[bnd_loc];
+ double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
- mat_dst[mat_row_idx] = 1.0;
- e->rhs[mat_row_idx] = bnd_fixval->val[idx[!ci]];
- memset(e->scratch_line, 0, e->N_ghosts * sizeof(*e->scratch_line));
- break;
- }
+ if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_REFLECT)
+ continue;
- if (!bnd_fixval) {
- /* apply the boundary conditions to eliminate the ghostpoint values */
- // reflect
- for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
- MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
+ for (ptrdiff_t bnd_layer = 1; bnd_layer <= ctx->fd_stencil; bnd_layer++)
+ for (ptrdiff_t bnd_idx = -(ptrdiff_t)ctx->fd_stencil; bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil); bnd_idx++) {
+ dst[-dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] += dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+ dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
+ }
+ }
- const int ci = mg2d_bnd_coord_idx(bnd_loc);
- const int dir = mg2d_bnd_out_dir(bnd_loc);
+ // fixval
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
- const ptrdiff_t dst_stride[2] = { 1, row_stride };
- const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
- const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const int dir = mg2d_bnd_out_dir(bnd_loc);
- double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
+ const ptrdiff_t dst_stride[2] = { 1, row_stride };
+ const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
+ const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
- if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_REFLECT)
- continue;
+ double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
- for (ptrdiff_t bnd_layer = 1; bnd_layer <= ctx->fd_stencil; bnd_layer++)
- for (ptrdiff_t bnd_idx = -(ptrdiff_t)ctx->fd_stencil; bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil); bnd_idx++) {
- dst[-dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] += dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
- dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
- }
+ if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_FIXVAL)
+ continue;
+
+ for (ptrdiff_t bnd_layer = 1; bnd_layer < ctx->fd_stencil; bnd_layer++)
+ for (ptrdiff_t bnd_idx = -((ptrdiff_t)ctx->fd_stencil - 1); bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil - 1); bnd_idx++) {
+ e->rhs[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+ dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
}
+ }
- // fixval
- for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
- MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
+ // falloff
+ for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
+ MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
- const int ci = mg2d_bnd_coord_idx(bnd_loc);
- const int dir = mg2d_bnd_out_dir(bnd_loc);
+ const int ci = mg2d_bnd_coord_idx(bnd_loc);
+ const int dir = mg2d_bnd_out_dir(bnd_loc);
- const ptrdiff_t dst_stride[2] = { 1, row_stride };
- const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
- const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
+ const ptrdiff_t dst_stride[2] = { 1, row_stride };
+ const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
+ const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
- double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
+ double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
- if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_FIXVAL)
- continue;
+ if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_FALLOFF)
+ continue;
- for (ptrdiff_t bnd_layer = 1; bnd_layer < ctx->fd_stencil; bnd_layer++)
- for (ptrdiff_t bnd_idx = -((ptrdiff_t)ctx->fd_stencil - 1); bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil - 1); bnd_idx++) {
- e->rhs[mat_row_idx] -= bnd->val[bnd_layer * bnd->val_stride + bnd_idx] * dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
- dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
- }
+ for (ptrdiff_t bnd_layer = ctx->fd_stencil; bnd_layer > 0; bnd_layer--)
+ for (ptrdiff_t bnd_idx = -(ptrdiff_t)ctx->fd_stencil; bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil); bnd_idx++) {
+ const double x = ctx->step[0] * (ctx->domain_size[0] - 1 + bnd_layer - ctx->fd_stencil);
+ const double y = ctx->step[0] * bnd_idx;
+ const double r = sqrt(SQR(x) + SQR(y));
+ double val = dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+
+ for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) {
+ dst[dir * (bnd_layer - i) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -=
+ val * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0];
+ }
+ dst[dir * (bnd_layer - (ptrdiff_t)ctx->fd_stencil) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -= val * (x / SQR(r)) * (ctx->step[0] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]);
+
+ dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
}
+ }
- // falloff
- for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
- MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
+ /* copy the interior values */
+ for (ptrdiff_t idx1_col = 0; idx1_col < ctx->domain_size[1]; idx1_col++)
+ for (ptrdiff_t idx0_col = 0; idx0_col < ctx->domain_size[0]; idx0_col++) {
+ mat_dst[idx1_col * ctx->domain_size[0] + idx0_col] = mat_row[idx1_col * row_stride + idx0_col];
+ mat_row[idx1_col * row_stride + idx0_col] = 0.0;
+ }
- const int ci = mg2d_bnd_coord_idx(bnd_loc);
- const int dir = mg2d_bnd_out_dir(bnd_loc);
+ }
- const ptrdiff_t dst_stride[2] = { 1, row_stride };
- const ptrdiff_t bnd_stride[2] = { dst_stride[!ci], dst_stride[ci] };
- const ptrdiff_t bnd_len[2] = { ctx->domain_size[!ci], ctx->domain_size[ci] };
+ /* make sure all the values from the scratch line have been accounted for */
+ for (ptrdiff_t idx_scratch = 0; idx_scratch < e->N_ghosts; idx_scratch++)
+ if (scratch_line[idx_scratch] != 0.0)
+ abort();
+ }
+}
- double *dst = mat_row + mg2d_bnd_is_upper(bnd_loc) * ((bnd_len[1] - 1) * bnd_stride[1]);
+static int mat_fill_row_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+{
+ EGSContext *ctx = arg;
+ EGSExactInternal *e = &ctx->priv->e;
- if (!is_bnd[bnd_loc] || bnd->type != MG2D_BC_TYPE_FALLOFF)
- continue;
+ const ptrdiff_t idx1 = job_idx / ctx->domain_size[0];
+ const ptrdiff_t idx0 = job_idx % ctx->domain_size[0];
- for (ptrdiff_t bnd_layer = ctx->fd_stencil; bnd_layer > 0; bnd_layer--)
- for (ptrdiff_t bnd_idx = -(ptrdiff_t)ctx->fd_stencil; bnd_idx < (ptrdiff_t)(bnd_len[0] + ctx->fd_stencil); bnd_idx++) {
- const double x = ctx->step[0] * (ctx->domain_size[0] - 1 + bnd_layer - ctx->fd_stencil);
- const double y = ctx->step[0] * bnd_idx;
- const double r = sqrt(SQR(x) + SQR(y));
- double val = dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]];
+ mat_fill_row(ctx, e->scratch_lines + e->N_ghosts * thread_idx, idx1, idx0);
- for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) {
- dst[dir * (bnd_layer - i) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -=
- val * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0];
- }
- dst[dir * (bnd_layer - (ptrdiff_t)ctx->fd_stencil) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -= val * (x / SQR(r)) * (ctx->step[0] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]);
+ return 0;
+}
- dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]] = 0.0;
- }
- }
+static int solve_exact(EGSContext *ctx)
+{
+ EGSInternal *priv = ctx->priv;
+ EGSExactContext *ec = ctx->solver_data;
+ EGSExactInternal *e = &priv->e;
+ int64_t start;
+ int ret;
- /* copy the interior values */
- for (ptrdiff_t idx1_col = 0; idx1_col < ctx->domain_size[1]; idx1_col++)
- for (ptrdiff_t idx0_col = 0; idx0_col < ctx->domain_size[0]; idx0_col++) {
- mat_dst[idx1_col * ctx->domain_size[0] + idx0_col] = mat_row[idx1_col * row_stride + idx0_col];
- mat_row[idx1_col * row_stride + idx0_col] = 0.0;
- }
+ start = gettime();
- }
+ memset(e->mat, 0, SQR(e->N) * sizeof(*e->mat));
+ tp_execute(ctx->tp, ctx->domain_size[1] * ctx->domain_size[0], mat_fill_row_task, ctx);
- /* make sure all the values from the scratch line have been accounted for */
- for (ptrdiff_t idx_scratch = 0; idx_scratch < e->N_ghosts; idx_scratch++)
- if (e->scratch_line[idx_scratch] != 0.0)
- abort();
- }
- }
- }
ec->time_mat_construct += gettime() - start;
ec->count_mat_construct++;
@@ -632,17 +650,6 @@ int mg2di_egs_init(EGSContext *ctx)
start_init = gettime();
- if (ctx->solver_type == EGS_SOLVER_EXACT) {
- switch (ctx->fd_stencil) {
- case 1: priv->e.fill_mat = fill_mat_s1; break;
- case 2: priv->e.fill_mat = fill_mat_s2; break;
- default:
- mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n",
- ctx->fd_stencil);
- return -EINVAL;
- }
- }
-
if (ctx->step[0] <= DBL_EPSILON || ctx->step[1] <= DBL_EPSILON) {
mg2di_log(&ctx->logger, 0, "Spatial step size too small\n");
return -EINVAL;
@@ -674,6 +681,32 @@ int mg2di_egs_init(EGSContext *ctx)
ctx->tp = priv->tp_internal;
}
+ if (ctx->solver_type == EGS_SOLVER_EXACT) {
+ EGSExactInternal *e = &priv->e;
+ unsigned int nb_threads = tp_get_nb_threads(ctx->tp);
+
+ switch (ctx->fd_stencil) {
+ case 1: e->fill_mat = fill_mat_s1; break;
+ case 2: e->fill_mat = fill_mat_s2; break;
+ default:
+ mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n",
+ ctx->fd_stencil);
+ return -EINVAL;
+ }
+
+ if (e->scratch_lines_allocated < nb_threads) {
+ free(e->scratch_lines);
+ e->scratch_lines = NULL;
+ e->scratch_lines_allocated = 0;
+
+ e->scratch_lines = calloc(e->N_ghosts * nb_threads,
+ sizeof(*e->scratch_lines));
+ if (!e->scratch_lines)
+ return -ENOMEM;
+ e->scratch_lines_allocated = nb_threads;
+ }
+ }
+
priv->residual_calc_offset = 0;
priv->residual_calc_size[0] = ctx->domain_size[0];
priv->residual_calc_size[1] = ctx->domain_size[1];
@@ -786,13 +819,12 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
e->N = domain_size[0] * domain_size[1];
e->N_ghosts = (domain_size[0] + 2 * FD_STENCIL_MAX) * (domain_size[1] + 2 * FD_STENCIL_MAX);
- e->scratch_line = calloc(e->N_ghosts, sizeof(*e->scratch_line));
e->mat = calloc(SQR(e->N), sizeof(*e->mat));
e->mat_f = calloc(SQR(e->N), sizeof(*e->mat_f));
e->rhs = calloc(e->N, sizeof(*e->rhs));
e->x = calloc(e->N, sizeof(*e->x));
e->ipiv = calloc(e->N, sizeof(*e->ipiv));
- if (!e->scratch_line || !e->mat || !e->rhs || !e->ipiv)
+ if (!e->mat || !e->mat_f || !e->rhs || !e->x || !e->ipiv)
return -ENOMEM;
ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 64);
@@ -873,7 +905,7 @@ void mg2di_egs_free(EGSContext **pctx)
if (ctx->solver_type == EGS_SOLVER_EXACT) {
EGSExactInternal *e = &ctx->priv->e;
- free(e->scratch_line);
+ free(e->scratch_lines);
free(e->mat);
free(e->mat_f);
free(e->rhs);