summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-06-23 09:51:53 +0200
committerAnton Khirnov <anton@khirnov.net>2019-06-23 09:51:53 +0200
commitb025c446f18d1c6d1e2b9782f6cc7b8079fd92fe (patch)
tree37432cfdb1c43af5a7bf85a577e83905c688faaf
parent95a2b7612306574d575a07babb5614aa2fda2566 (diff)
Parallelize the loops.
-rw-r--r--src/maximal_slicing_axi_mg.c21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c
index 39cb1f3..11770d5 100644
--- a/src/maximal_slicing_axi_mg.c
+++ b/src/maximal_slicing_axi_mg.c
@@ -785,6 +785,7 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver
solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_DISCONT;
solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_POLE;
+#pragma omp parallel for
for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++)
for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) {
const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]);
@@ -875,6 +876,7 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver
static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
{
+#pragma omp parallel for
for (int j = 0; j < solver->local_size[1]; j++)
for (int i = 0; i < solver->local_size[0]; i++) {
const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]);
@@ -885,6 +887,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
if (cp->level == 0) {
/* on the coarsest level, extrapolate the outer boundary ghost points */
if (!cp->bnd_intercomp[0][1]) {
+#pragma omp parallel for
for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->local_size[1]; idx_z++)
for (int idx_x = cp->offset_left[0] + solver->local_size[0]; idx_x < cp->grid_size[0]; idx_x++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
@@ -893,6 +896,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
}
if (!cp->bnd_intercomp[1][1]) {
+#pragma omp parallel for
for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++)
for (int idx_z = cp->offset_left[1] + solver->local_size[1]; idx_z < cp->grid_size[2]; idx_z++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
@@ -905,6 +909,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
/* fill in the axial symmetry ghostpoints by mirroring */
if (!cp->bnd_intercomp[0][0]) {
+#pragma omp parallel for
for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++)
for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
@@ -913,6 +918,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst)
}
}
if (!cp->bnd_intercomp[1][0]) {
+#pragma omp parallel for
for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++)
for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) {
const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z);
@@ -960,6 +966,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
+#pragma omp parallel for
for (size_t i = 0; i < grid_size; i++)
lapse_mg_eval[i] = fact0 * lapse_prev0[i] + fact1 * lapse_prev1[i];
@@ -996,25 +1003,27 @@ void msa_mg_eval(CCTK_ARGUMENTS)
if (solver_idx) {
MG2DContext *solver_src = cp->solver_eval[solver_idx - 1];
+#pragma omp parallel for
for (int line = 0; line < solver->local_size[1] ; line++) {
memcpy(solver->u + line * solver->u_stride, solver_src->u + line * solver_src->u_stride,
sizeof(*solver->u) * solver->local_size[0]);
}
} else {
+#pragma omp parallel for
for (int line = 0; line < solver->local_size[1]; line++)
for (int i = 0; i < solver->local_size[0]; i++)
solver->u[line * solver->u_stride + i] = lapse_mg[(cp->offset_left[1] + line) * cctk_lsh[0] + cp->offset_left[0] + i] - 1.0;
}
+ ms->time_init_guess_eval += gettime() - start;
}
- ms->time_init_guess_eval += gettime() - start;
-
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
start = gettime();
if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) {
+#pragma omp parallel for
for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
@@ -1022,6 +1031,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
}
}
if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) {
+#pragma omp parallel for
for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++)
for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z);
@@ -1030,6 +1040,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
}
if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < solver->fd_stencil; j++) {
MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1044,6 +1055,7 @@ void msa_mg_eval(CCTK_ARGUMENTS)
}
}
if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < solver->fd_stencil; j++) {
MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1163,6 +1175,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0);
if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < cp->solver->fd_stencil; j++) {
for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j);
@@ -1171,6 +1184,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
}
}
if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < cp->solver->fd_stencil; j++) {
for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) {
const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i);
@@ -1195,6 +1209,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
/* fill the solver boundary conditions */
if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < cp->solver->fd_stencil; j++) {
MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1205,6 +1220,7 @@ void msa_mg_solve(CCTK_ARGUMENTS)
}
}
if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) {
+#pragma omp parallel for
for (int j = 0; j < cp->solver->fd_stencil; j++) {
MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U];
double *dst = bnd->val + j * bnd->val_stride;
@@ -1238,6 +1254,7 @@ skip_solve:
const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top));
start = gettime();
+#pragma omp parallel for
for (size_t j = 0; j < grid_size; j++) {
const double sol_new = lapse_mg[j];
const double delta = sol_new - lapse_mg_eval[j];