aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-09-06 13:45:22 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-06 13:45:22 +0200
commit250f4e66a7af781750c7743e04332e5fe5abc859 (patch)
treee584e62a9f24ec9e64cf84fb1a27587879b15e34
parent827ccac987a0b297ae02c41acee95689af5a129b (diff)
Parallelize on the level of variables rather than grid points.HEADmaster
The latter has higher overhead.
-rw-r--r--src/InitialCopy.c9
-rw-r--r--src/Operators.c12
-rw-r--r--src/RK4.c1
3 files changed, 7 insertions, 15 deletions
diff --git a/src/InitialCopy.c b/src/InitialCopy.c
index 4c0ee87..176e2de 100644
--- a/src/InitialCopy.c
+++ b/src/InitialCopy.c
@@ -122,6 +122,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS)
totalsize *= cctk_ash[arraydim];
}
+#pragma omp parallel for
for (var = 0; var < MoLNumEvolvedVariables; var++)
{
const int nsrc = 1;
@@ -236,6 +237,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS)
current level to the scratch space, then do the copy
*/
+#pragma omp parallel for
for (var = 0; var < MoLNumSandRVariables; var++)
{
@@ -280,6 +282,7 @@ void MoL_InitialCopy(CCTK_ARGUMENTS)
Now do the constrained variables.
*/
+#pragma omp parallel for
for (var = 0; var < MoLNumConstrainedVariables; var++)
{
@@ -316,7 +319,6 @@ void MoL_InitialCopy(CCTK_ARGUMENTS)
if (PreviousVar && CurrentVar)
{
-#pragma omp parallel for
for (int k = 0; k < cctk_ash[2]; k++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, k);
memcpy(CurrentVar + offset, PreviousVar + offset, cctk_ash[0] * sizeof(double));
@@ -389,6 +391,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS)
totalsize *= cctk_ash[arraydim];
}
+#pragma omp parallel for
for (var = 0; var < MoLNumEvolvedVariables; var++)
{
StorageOn = CCTK_QueryGroupStorageI(cctkGH,
@@ -415,6 +418,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS)
NULL, NULL, NULL, 0);
}
+#pragma omp parallel for
for (var = 0; var < MoLNumEvolvedArrayVariables; var++)
{
RHSVar = (CCTK_REAL*)CCTK_VarDataPtrI(cctkGH, 0,
@@ -440,7 +444,6 @@ void MoL_InitRHS(CCTK_ARGUMENTS)
{
if (RHSVar)
{
-#pragma omp parallel for
for (index = 0; index < arraytotalsize; index++)
{
RHSVar[index] = 0;
@@ -461,6 +464,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS)
#ifdef MOLDOESCOMPLEX
+#pragma omp parallel for
for (var = 0; var < MoLNumEvolvedComplexVariables; var++)
{
@@ -483,7 +487,6 @@ void MoL_InitRHS(CCTK_ARGUMENTS)
RHSComplexVariableIndex[var]);
if (RHSVar)
{
-#pragma omp parallel for
for (index = 0; index < totalsize; index++)
{
RHSVar[index] = 0;
diff --git a/src/Operators.c b/src/Operators.c
index 2fed466..ed318f7 100644
--- a/src/Operators.c
+++ b/src/Operators.c
@@ -202,21 +202,18 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_0(varptr + offset, ash[0]);
}
break;
case 1:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_1(varptr + offset, srcptrs[0] + offset, facts[0], ash[0]);
}
break;
case 2:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_2(varptr + offset, srcptrs[0] + offset, facts[0],
@@ -224,7 +221,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
break;
case 3:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_3(varptr + offset,
@@ -234,7 +230,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
break;
default:
// Loop over all grid points
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
for (j = 0; j < ash[0]; j++) {
int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
@@ -255,21 +250,18 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_0(varptr + offset, scale, ash[0]);
}
break;
case 1:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_1(varptr + offset, scale, srcptrs[0] + offset, facts[0], ash[0]);
}
break;
case 2:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_2(varptr + offset, scale,
@@ -278,7 +270,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
break;
case 3:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_3(varptr + offset, scale,
@@ -288,7 +279,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
break;
default:
// Loop over all grid points
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
for (j = 0; j < ash[0]; j++) {
int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
@@ -320,7 +310,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
if (scale == 0.0) {
// Set (overwrite) target variable
// Loop over all grid points
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
CCTK_COMPLEX tmp = CCTK_Cmplx(0.0, 0.0);
for (int n=0; n<nsrcs; ++n) {
@@ -331,7 +320,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
} else {
// Update (add to) target variable
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
CCTK_COMPLEX tmp = CCTK_CmplxMul(CCTK_Cmplx(scale, 0.0), varptr[i]);
for (int n=0; n<nsrcs; ++n) {
diff --git a/src/RK4.c b/src/RK4.c
index 4e72389..e019a4d 100644
--- a/src/RK4.c
+++ b/src/RK4.c
@@ -150,6 +150,7 @@ CCTK_WARN(0, "not implemented");
/* Real GFs */
+#pragma omp parallel for
for (var = 0; var < MoLNumEvolvedVariables; var++)
{
{