aboutsummaryrefslogtreecommitdiff
path: root/Carpet/Carpet/src/Restrict.cc
diff options
context:
space:
mode:
authortradke <schnetter@cct.lsu.edu>2005-03-11 16:00:00 +0000
committertradke <schnetter@cct.lsu.edu>2005-03-11 16:00:00 +0000
commite44239daa6d4116e89713a62379e33ccaa98f56f (patch)
treec2ac2d23ef7ad4959f145b7880145727a71fd70a /Carpet/Carpet/src/Restrict.cc
parent5739da7d68bf093893a59aa630472436d08aacec (diff)
Carpet/CarpetLib: minimise the number of outstanding communication requests
This patch greatly reduces the number of outstanding MPI_Isend/MPI_Irecv communication requests by moving the loop over comm_states (recv,send,wait) from the outermost to the innermost. This resolves problems with certain MPI implementations (specifically LAM, MPICH-NCSA, and Mvapich over Infiniband) which potentially resulted in some communication buffer overflow and caused the Cactus application to abort or hang forever. Preliminary benchmarks with BSSN_MoL show that the patch does not have a negative impact on myrinet clusters (measured to 64 processors). It even improves the Carpet performance on GigE clusters (measured up to 16 processors). The order of the communication loops is controlled by the boolean parameter CarpetRegrid::minimise_outstanding_communications which defaults to "no" (preserving the old behaviour). darcs-hash:20050311160040-3fd61-04d40ac79ef218252f9364a8d18796e9b270d295.gz
Diffstat (limited to 'Carpet/Carpet/src/Restrict.cc')
-rw-r--r--Carpet/Carpet/src/Restrict.cc78
1 files changed, 70 insertions, 8 deletions
diff --git a/Carpet/Carpet/src/Restrict.cc b/Carpet/Carpet/src/Restrict.cc
index 2716ecd56..76dba62ed 100644
--- a/Carpet/Carpet/src/Restrict.cc
+++ b/Carpet/Carpet/src/Restrict.cc
@@ -33,7 +33,10 @@ namespace Carpet {
// Restrict
if (reflevel < reflevels-1) {
- for (comm_state state; !state.done(); state.step()) {
+
+ // make the comm_state loop the innermost
+ // in order to minimise the number of outstanding communications
+ if (minimise_outstanding_communications) {
for (int group=0; group<CCTK_NumGroups(); ++group) {
if (CCTK_GroupTypeI(group) == CCTK_GF) {
if (CCTK_QueryGroupStorageI(cgh, group)) {
@@ -53,8 +56,10 @@ namespace Carpet {
for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
- arrdata.at(group).at(m).data.at(var)->ref_restrict
- (state, tl, reflevel, c, mglevel, time);
+ for (comm_state state; !state.done(); state.step()) {
+ arrdata.at(group).at(m).data.at(var)->ref_restrict
+ (state, tl, reflevel, c, mglevel, time);
+ }
}
}
}
@@ -62,14 +67,48 @@ namespace Carpet {
} // if group has storage
} // if grouptype == CCTK_GF
} // loop over groups
- } // for state
+ } else {
+ for (comm_state state; !state.done(); state.step()) {
+ for (int group=0; group<CCTK_NumGroups(); ++group) {
+ if (CCTK_GroupTypeI(group) == CCTK_GF) {
+ if (CCTK_QueryGroupStorageI(cgh, group)) {
+
+ const int tl = 0;
+
+ for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
+
+ // use background time here (which may not be modified
+ // by the user)
+ const CCTK_REAL time = vtt.at(m)->time (tl, reflevel, mglevel);
+
+ const CCTK_REAL time1 = vtt.at(m)->time (0, reflevel, mglevel);
+ const CCTK_REAL time2
+ = (cgh->cctk_time - cctk_initial_time) / delta_time;
+ assert (fabs(time1 - time2) / (fabs(time1) + fabs(time2) + fabs(cgh->cctk_delta_time)) < 1e-12);
+
+ for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
+ for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
+ arrdata.at(group).at(m).data.at(var)->ref_restrict
+ (state, tl, reflevel, c, mglevel, time);
+ }
+ }
+ }
+
+ } // if group has storage
+ } // if grouptype == CCTK_GF
+ } // loop over groups
+ } // for state
+ } // if minimise_outstanding_communications
} // if not finest refinement level
// Sync
if (reflevel < reflevels-1) {
- for (comm_state state; !state.done(); state.step()) {
+
+ // make the comm_state loop the innermost
+ // in order to minimise the number of outstanding communications
+ if (minimise_outstanding_communications) {
for (int group=0; group<CCTK_NumGroups(); ++group) {
if (CCTK_GroupTypeI(group) == CCTK_GF) {
if (CCTK_QueryGroupStorageI(cgh, group)) {
@@ -79,8 +118,10 @@ namespace Carpet {
for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
- arrdata.at(group).at(m).data.at(var)->sync
- (state, tl, reflevel, c, mglevel);
+ for (comm_state state; !state.done(); state.step()) {
+ arrdata.at(group).at(m).data.at(var)->sync
+ (state, tl, reflevel, c, mglevel);
+ }
}
}
}
@@ -88,7 +129,28 @@ namespace Carpet {
} // if group has storage
} // if grouptype == CCTK_GF
} // loop over groups
- } // for state
+ } else {
+ for (comm_state state; !state.done(); state.step()) {
+ for (int group=0; group<CCTK_NumGroups(); ++group) {
+ if (CCTK_GroupTypeI(group) == CCTK_GF) {
+ if (CCTK_QueryGroupStorageI(cgh, group)) {
+
+ const int tl = 0;
+
+ for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
+ for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
+ for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
+ arrdata.at(group).at(m).data.at(var)->sync
+ (state, tl, reflevel, c, mglevel);
+ }
+ }
+ }
+
+ } // if group has storage
+ } // if grouptype == CCTK_GF
+ } // loop over groups
+ } // for state
+ } // if minimise_outstanding_communications
} // if not finest refinement level
}