From b025c446f18d1c6d1e2b9782f6cc7b8079fd92fe Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 23 Jun 2019 09:51:53 +0200 Subject: Parallelize the loops. --- src/maximal_slicing_axi_mg.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 39cb1f3..11770d5 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -785,6 +785,7 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_DISCONT; solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].flags = MG2D_DC_FLAG_POLE; +#pragma omp parallel for for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); @@ -875,6 +876,7 @@ static void fill_eq_coeffs(MSMGContext *ctx, CoordPatch *cp, MG2DContext *solver static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) { +#pragma omp parallel for for (int j = 0; j < solver->local_size[1]; j++) for (int i = 0; i < solver->local_size[0]; i++) { const ptrdiff_t idx_dst = CPINDEX(cp, i + cp->offset_left[0], cp->y_idx, j + cp->offset_left[1]); @@ -885,6 +887,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) if (cp->level == 0) { /* on the coarsest level, extrapolate the outer boundary ghost points */ if (!cp->bnd_intercomp[0][1]) { +#pragma omp parallel for for (int idx_z = cp->offset_left[1]; idx_z < cp->offset_left[1] + solver->local_size[1]; idx_z++) for (int idx_x = cp->offset_left[0] + solver->local_size[0]; idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); @@ -893,6 +896,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) } if (!cp->bnd_intercomp[1][1]) { +#pragma omp parallel for for (int idx_x = cp->offset_left[0]; idx_x < cp->grid_size[0]; idx_x++) for (int idx_z = cp->offset_left[1] + solver->local_size[1]; idx_z < cp->grid_size[2]; idx_z++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); @@ -905,6 +909,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) /* fill in the axial symmetry ghostpoints by mirroring */ if (!cp->bnd_intercomp[0][0]) { +#pragma omp parallel for for (int idx_z = cp->offset_left[1]; idx_z < cp->grid_size[2]; idx_z++) for (int idx_x = 0; idx_x < cp->offset_left[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); @@ -913,6 +918,7 @@ static void solution_to_grid(CoordPatch *cp, MG2DContext *solver, double *dst) } } if (!cp->bnd_intercomp[1][0]) { +#pragma omp parallel for for (int idx_z = 0; idx_z < cp->offset_left[1]; idx_z++) for (int idx_x = 0; idx_x < cp->grid_size[0]; idx_x++) { const ptrdiff_t idx_dst = CPINDEX(cp, idx_x, cp->y_idx, idx_z); @@ -960,6 +966,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); +#pragma omp parallel for for (size_t i = 0; i < grid_size; i++) lapse_mg_eval[i] = fact0 * lapse_prev0[i] + fact1 * lapse_prev1[i]; @@ -996,25 +1003,27 @@ void msa_mg_eval(CCTK_ARGUMENTS) if (solver_idx) { MG2DContext *solver_src = cp->solver_eval[solver_idx - 1]; +#pragma omp parallel for for (int line = 0; line < solver->local_size[1] ; line++) { memcpy(solver->u + line * solver->u_stride, solver_src->u + line * solver_src->u_stride, sizeof(*solver->u) * solver->local_size[0]); } } else { +#pragma omp parallel for for (int line = 0; line < solver->local_size[1]; line++) for (int i = 0; i < solver->local_size[0]; i++) solver->u[line * solver->u_stride + i] = lapse_mg[(cp->offset_left[1] + line) * cctk_lsh[0] + cp->offset_left[0] + i] - 1.0; } + ms->time_init_guess_eval += gettime() - start; } - ms->time_init_guess_eval += gettime() - start; - LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); start = gettime(); if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { +#pragma omp parallel for for (ptrdiff_t idx_z = cp->offset_left[1] + solver->local_size[1] - 1; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = 0; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); @@ -1022,6 +1031,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) } } if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { +#pragma omp parallel for for (ptrdiff_t idx_z = 0; idx_z < cctkGH->cctk_lsh[2]; idx_z++) for (ptrdiff_t idx_x = cp->offset_left[0] + solver->local_size[0] - 1; idx_x < cctkGH->cctk_lsh[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x, cp->y_idx, idx_z); @@ -1030,6 +1040,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) } if (solver->local_start[1] + solver->local_size[1] == solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1044,6 +1055,7 @@ void msa_mg_eval(CCTK_ARGUMENTS) } } if (solver->local_start[0] + solver->local_size[0] == solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < solver->fd_stencil; j++) { MG2DBoundary *bnd = solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1163,6 +1175,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[0] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cp->offset_left[1] + cp->solver->local_size[1] - 1 + j); @@ -1171,6 +1184,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) } } if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)cp->solver->local_size[1] + j; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cp->offset_left[0] + cp->solver->local_size[0] - 1 + j, cp->y_idx, cp->offset_left[1] + i); @@ -1195,6 +1209,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) /* fill the solver boundary conditions */ if (cp->solver->local_start[1] + cp->solver->local_size[1] == cp->solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_1U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1205,6 +1220,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) } } if (cp->solver->local_start[0] + cp->solver->local_size[0] == cp->solver->domain_size) { +#pragma omp parallel for for (int j = 0; j < cp->solver->fd_stencil; j++) { MG2DBoundary *bnd = cp->solver->boundaries[MG2D_BOUNDARY_0U]; double *dst = bnd->val + j * bnd->val_stride; @@ -1238,6 +1254,7 @@ skip_solve: const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top)); start = gettime(); +#pragma omp parallel for for (size_t j = 0; j < grid_size; j++) { const double sol_new = lapse_mg[j]; const double delta = sol_new - lapse_mg_eval[j]; -- cgit v1.2.3