aboutsummaryrefslogtreecommitdiff
path: root/Carpet/CarpetLib/src/dh.cc
diff options
context:
space:
mode:
authortradke <schnetter@cct.lsu.edu>2005-03-11 16:00:00 +0000
committertradke <schnetter@cct.lsu.edu>2005-03-11 16:00:00 +0000
commite44239daa6d4116e89713a62379e33ccaa98f56f (patch)
treec2ac2d23ef7ad4959f145b7880145727a71fd70a /Carpet/CarpetLib/src/dh.cc
parent5739da7d68bf093893a59aa630472436d08aacec (diff)
Carpet/CarpetLib: minimise the number of outstanding communication requests
This patch greatly reduces the number of outstanding MPI_Isend/MPI_Irecv communication requests by moving the loop over comm_states (recv,send,wait) from the outermost to the innermost. This resolves problems with certain MPI implementations (specifically LAM, MPICH-NCSA, and Mvapich over Infiniband) which potentially resulted in some communication buffer overflow and caused the Cactus application to abort or hang forever. Preliminary benchmarks with BSSN_MoL show that the patch does not have a negative impact on myrinet clusters (measured to 64 processors). It even improves the Carpet performance on GigE clusters (measured up to 16 processors). The order of the communication loops is controlled by the boolean parameter CarpetRegrid::minimise_outstanding_communications which defaults to "no" (preserving the old behaviour). darcs-hash:20050311160040-3fd61-04d40ac79ef218252f9364a8d18796e9b270d295.gz
Diffstat (limited to 'Carpet/CarpetLib/src/dh.cc')
-rw-r--r--Carpet/CarpetLib/src/dh.cc47
1 files changed, 36 insertions, 11 deletions
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc
index 2078b88a3..b28f0ac7d 100644
--- a/Carpet/CarpetLib/src/dh.cc
+++ b/Carpet/CarpetLib/src/dh.cc
@@ -540,6 +540,8 @@ void dh::calculate_bases ()
void dh::save_time (bool do_prolongate)
{
+ DECLARE_CCTK_PARAMETERS;
+
for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) {
(*f)->recompose_crop ();
}
@@ -547,22 +549,45 @@ void dh::save_time (bool do_prolongate)
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
(*f)->recompose_allocate (rl);
}
- for (comm_state state; !state.done(); state.step()) {
+ // make the comm_state loop the innermost
+ // in order to minimise the number of outstanding communications
+ if (minimise_outstanding_communications) {
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
- (*f)->recompose_fill (state, rl, do_prolongate);
+ for (comm_state state; !state.done(); state.step()) {
+ (*f)->recompose_fill (state, rl, do_prolongate);
+ }
+ }
+ for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) {
+ (*f)->recompose_free (rl);
}
- }
- for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) {
- (*f)->recompose_free (rl);
- }
- for (comm_state state; !state.done(); state.step()) {
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
- (*f)->recompose_bnd_prolongate (state, rl, do_prolongate);
+ for (comm_state state; !state.done(); state.step()) {
+ (*f)->recompose_bnd_prolongate (state, rl, do_prolongate);
+ }
}
- }
- for (comm_state state; !state.done(); state.step()) {
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
- (*f)->recompose_sync (state, rl, do_prolongate);
+ for (comm_state state; !state.done(); state.step()) {
+ (*f)->recompose_sync (state, rl, do_prolongate);
+ }
+ }
+ } else {
+ for (comm_state state; !state.done(); state.step()) {
+ for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
+ (*f)->recompose_fill (state, rl, do_prolongate);
+ }
+ }
+ for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) {
+ (*f)->recompose_free (rl);
+ }
+ for (comm_state state; !state.done(); state.step()) {
+ for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
+ (*f)->recompose_bnd_prolongate (state, rl, do_prolongate);
+ }
+ }
+ for (comm_state state; !state.done(); state.step()) {
+ for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
+ (*f)->recompose_sync (state, rl, do_prolongate);
+ }
}
}
} // for rl