aboutsummaryrefslogtreecommitdiff
path: root/src/Operators.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-09-06 13:45:22 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-06 13:45:22 +0200
commit250f4e66a7af781750c7743e04332e5fe5abc859 (patch)
treee584e62a9f24ec9e64cf84fb1a27587879b15e34 /src/Operators.c
parent827ccac987a0b297ae02c41acee95689af5a129b (diff)
Parallelize on the level of variables rather than grid points.HEADmaster
The latter has higher overhead.
Diffstat (limited to 'src/Operators.c')
-rw-r--r--src/Operators.c12
1 files changed, 0 insertions, 12 deletions
diff --git a/src/Operators.c b/src/Operators.c
index 2fed466..ed318f7 100644
--- a/src/Operators.c
+++ b/src/Operators.c
@@ -202,21 +202,18 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_0(varptr + offset, ash[0]);
}
break;
case 1:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_1(varptr + offset, srcptrs[0] + offset, facts[0], ash[0]);
}
break;
case 2:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_2(varptr + offset, srcptrs[0] + offset, facts[0],
@@ -224,7 +221,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
break;
case 3:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_set_3(varptr + offset,
@@ -234,7 +230,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
break;
default:
// Loop over all grid points
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
for (j = 0; j < ash[0]; j++) {
int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
@@ -255,21 +250,18 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_0(varptr + offset, scale, ash[0]);
}
break;
case 1:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_1(varptr + offset, scale, srcptrs[0] + offset, facts[0], ash[0]);
}
break;
case 2:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_2(varptr + offset, scale,
@@ -278,7 +270,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
break;
case 3:
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
op_real_update_3(varptr + offset, scale,
@@ -288,7 +279,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
break;
default:
// Loop over all grid points
-#pragma omp parallel for
for (i = 0; i < ash[2]; i++) {
for (j = 0; j < ash[0]; j++) {
int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
@@ -320,7 +310,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
if (scale == 0.0) {
// Set (overwrite) target variable
// Loop over all grid points
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
CCTK_COMPLEX tmp = CCTK_Cmplx(0.0, 0.0);
for (int n=0; n<nsrcs; ++n) {
@@ -331,7 +320,6 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
} else {
// Update (add to) target variable
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
CCTK_COMPLEX tmp = CCTK_CmplxMul(CCTK_Cmplx(scale, 0.0), varptr[i]);
for (int n=0; n<nsrcs; ++n) {