aboutsummaryrefslogtreecommitdiff
path: root/Carpet/CarpetReduce
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2011-04-20 23:11:56 -0400
committerBarry Wardell <barry.wardell@gmail.com>2011-12-14 18:26:08 +0000
commit8ae7941013b2a9bdab54461ae0fcac4747d851e3 (patch)
treee1ab7d3e5919de5fa949b65008ed67e070102cf1 /Carpet/CarpetReduce
parentba0fae0a796d2dd0d23772b254827afaab15c191 (diff)
CarpetReduce: Rewrite OpenMP parallelisation for PGI compilers
Special case the OpenMP parallelisation of the main reduction loop for PGI compilers to circumvent a compiler bug.
Diffstat (limited to 'Carpet/CarpetReduce')
-rw-r--r--Carpet/CarpetReduce/src/reduce.cc70
1 files changed, 70 insertions, 0 deletions
diff --git a/Carpet/CarpetReduce/src/reduce.cc b/Carpet/CarpetReduce/src/reduce.cc
index 9d977a52c..26711fef6 100644
--- a/Carpet/CarpetReduce/src/reduce.cc
+++ b/Carpet/CarpetReduce/src/reduce.cc
@@ -613,6 +613,10 @@ namespace CarpetReduce {
imin[d] = (bbox[2*d ] ? 0 : nghostzones[d]);
imax[d] = lsh[d] - (bbox[2*d+1] ? 0 : nghostzones[d]);
}
+
+#if ! (defined(__PGI) && (__PGIC__>9))
+ // Regular case
+
#pragma omp parallel
{
T myoutval_local;
@@ -659,6 +663,72 @@ namespace CarpetReduce {
OP::combine (myoutval, mycnt, myoutval_local, mycnt_local);
}
} // end omp parallel
+
+#else
+ // Special case for PGI 10 and later
+
+ int const num_threads = dist::num_threads();
+ T myoutval_locals[num_threads];
+ T mycnt_locals [num_threads];
+ // This is probably not worthwhile:
+ // #pragma omp parallel
+ for (int n=0; n<num_threads; ++n) {
+ T& myoutval_local = myoutval_locals[n];
+ T& mycnt_local = mycnt_locals [n];
+ OP::initialise (myoutval_local, mycnt_local);
+ }
+#if CARPET_DIM == 3
+#pragma omp parallel for
+ for (int k=imin[2]; k<imax[2]; ++k) {
+ int const n = omp_get_thread_num();
+ T& myoutval_local = myoutval_locals[n];
+ T& mycnt_local = mycnt_locals [n];
+ for (int j=imin[1]; j<imax[1]; ++j) {
+ for (int i=imin[0]; i<imax[0]; ++i) {
+ const int index = i + lsh[0] * (j + lsh[1] * k);
+ CCTK_REAL const w = weight ? weight[index] * levfac : levfac;
+ T myinval = T(0);
+ for (size_t tl=0; tl<inarrays.size(); ++tl) {
+ myinval +=
+ static_cast<const T*>(inarrays.AT(tl))[index] * tfacs.AT(tl);
+ }
+ OP::reduce (myoutval_local, mycnt_local, myinval, w);
+ }
+ }
+ }
+#elif CARPET_DIM == 4
+#pragma omp parallel for
+ for (int l=imin[3]; l<imax[3]; ++l) {
+ int const n = omp_get_thread_num();
+ T& myoutval_local = myoutval_locals[n];
+ T& mycnt_local = mycnt_locals [n];
+ for (int k=imin[2]; k<imax[2]; ++k) {
+ for (int j=imin[1]; j<imax[1]; ++j) {
+ for (int i=imin[0]; i<imax[0]; ++i) {
+ const int index = i + lsh[0] * (j + lsh[1] * (k + lsh[2] * l));
+ CCTK_REAL const w = weight ? weight[index] * levfac : levfac;
+ T myinval = T(0);
+ for (size_t tl=0; tl<inarrays.size(); ++tl) {
+ myinval +=
+ static_cast<const T*>(inarrays.AT(tl))[index] * tfacs.AT(tl);
+ }
+ OP::reduce (myoutval_local, mycnt_local, myinval, w);
+ }
+ }
+ }
+ }
+#else
+# error "Value of CARPET_DIM is not supported"
+#endif
+ // This loop is not parallel
+ for (int n=0; n<num_threads; ++n) {
+ T const& myoutval_local = myoutval_locals[n];
+ T const& mycnt_local = mycnt_locals [n];
+ OP::combine (myoutval, mycnt, myoutval_local, mycnt_local);
+ }
+
+#endif
+
}
template<class T,class OP>