From 250f4e66a7af781750c7743e04332e5fe5abc859 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 6 Sep 2022 13:45:22 +0200 Subject: Parallelize on the level of variables rather than grid points. The latter has higher overhead. --- src/InitialCopy.c | 9 ++++++--- src/Operators.c | 12 ------------ src/RK4.c | 1 + 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/src/InitialCopy.c b/src/InitialCopy.c index 4c0ee87..176e2de 100644 --- a/src/InitialCopy.c +++ b/src/InitialCopy.c @@ -122,6 +122,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS) totalsize *= cctk_ash[arraydim]; } +#pragma omp parallel for for (var = 0; var < MoLNumEvolvedVariables; var++) { const int nsrc = 1; @@ -236,6 +237,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS) current level to the scratch space, then do the copy */ +#pragma omp parallel for for (var = 0; var < MoLNumSandRVariables; var++) { @@ -280,6 +282,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS) Now do the constrained variables. */ +#pragma omp parallel for for (var = 0; var < MoLNumConstrainedVariables; var++) { @@ -316,7 +319,6 @@ void MoL_InitialCopy(CCTK_ARGUMENTS) if (PreviousVar && CurrentVar) { -#pragma omp parallel for for (int k = 0; k < cctk_ash[2]; k++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, k); memcpy(CurrentVar + offset, PreviousVar + offset, cctk_ash[0] * sizeof(double)); @@ -389,6 +391,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) totalsize *= cctk_ash[arraydim]; } +#pragma omp parallel for for (var = 0; var < MoLNumEvolvedVariables; var++) { StorageOn = CCTK_QueryGroupStorageI(cctkGH, @@ -415,6 +418,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) NULL, NULL, NULL, 0); } +#pragma omp parallel for for (var = 0; var < MoLNumEvolvedArrayVariables; var++) { RHSVar = (CCTK_REAL*)CCTK_VarDataPtrI(cctkGH, 0, @@ -440,7 +444,6 @@ void MoL_InitRHS(CCTK_ARGUMENTS) { if (RHSVar) { -#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { RHSVar[index] = 0; @@ -461,6 +464,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) #ifdef MOLDOESCOMPLEX +#pragma omp parallel for for (var = 0; var < MoLNumEvolvedComplexVariables; var++) { @@ -483,7 +487,6 @@ void MoL_InitRHS(CCTK_ARGUMENTS) RHSComplexVariableIndex[var]); if (RHSVar) { -#pragma omp parallel for for (index = 0; index < totalsize; index++) { RHSVar[index] = 0; diff --git a/src/Operators.c b/src/Operators.c index 2fed466..ed318f7 100644 --- a/src/Operators.c +++ b/src/Operators.c @@ -202,21 +202,18 @@ MoL_LinearCombination(cGH const *const cctkGH, // performance switch (nsrcs) { case 0: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_set_0(varptr + offset, ash[0]); } break; case 1: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_set_1(varptr + offset, srcptrs[0] + offset, facts[0], ash[0]); } break; case 2: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_set_2(varptr + offset, srcptrs[0] + offset, facts[0], @@ -224,7 +221,6 @@ MoL_LinearCombination(cGH const *const cctkGH, } break; case 3: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_set_3(varptr + offset, @@ -234,7 +230,6 @@ MoL_LinearCombination(cGH const *const cctkGH, break; default: // Loop over all grid points -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { for (j = 0; j < ash[0]; j++) { int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i); @@ -255,21 +250,18 @@ MoL_LinearCombination(cGH const *const cctkGH, // performance switch (nsrcs) { case 0: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_update_0(varptr + offset, scale, ash[0]); } break; case 1: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_update_1(varptr + offset, scale, srcptrs[0] + offset, facts[0], ash[0]); } break; case 2: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_update_2(varptr + offset, scale, @@ -278,7 +270,6 @@ MoL_LinearCombination(cGH const *const cctkGH, } break; case 3: -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); op_real_update_3(varptr + offset, scale, @@ -288,7 +279,6 @@ MoL_LinearCombination(cGH const *const cctkGH, break; default: // Loop over all grid points -#pragma omp parallel for for (i = 0; i < ash[2]; i++) { for (j = 0; j < ash[0]; j++) { int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i); @@ -320,7 +310,6 @@ MoL_LinearCombination(cGH const *const cctkGH, if (scale == 0.0) { // Set (overwrite) target variable // Loop over all grid points -#pragma omp parallel for for (ptrdiff_t i=0; i