diff options
author | tradke <schnetter@cct.lsu.edu> | 2005-03-11 16:00:00 +0000 |
---|---|---|
committer | tradke <schnetter@cct.lsu.edu> | 2005-03-11 16:00:00 +0000 |
commit | e44239daa6d4116e89713a62379e33ccaa98f56f (patch) | |
tree | c2ac2d23ef7ad4959f145b7880145727a71fd70a /Carpet/CarpetLib/src/dh.cc | |
parent | 5739da7d68bf093893a59aa630472436d08aacec (diff) |
Carpet/CarpetLib: minimise the number of outstanding communication requests
This patch greatly reduces the number of outstanding MPI_Isend/MPI_Irecv
communication requests by moving the loop over comm_states (recv,send,wait)
from the outermost to the innermost.
This resolves problems with certain MPI implementations (specifically LAM,
MPICH-NCSA, and Mvapich over Infiniband) which potentially resulted in some
communication buffer overflow and caused the Cactus application to abort or
hang forever.
Preliminary benchmarks with BSSN_MoL show that the patch does not have a
negative impact on myrinet clusters (measured to 64 processors).
It even improves the Carpet performance on GigE clusters (measured up to 16
processors).
The order of the communication loops is controlled by the boolean parameter
CarpetRegrid::minimise_outstanding_communications
which defaults to "no" (preserving the old behaviour).
darcs-hash:20050311160040-3fd61-04d40ac79ef218252f9364a8d18796e9b270d295.gz
Diffstat (limited to 'Carpet/CarpetLib/src/dh.cc')
-rw-r--r-- | Carpet/CarpetLib/src/dh.cc | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc index 2078b88a3..b28f0ac7d 100644 --- a/Carpet/CarpetLib/src/dh.cc +++ b/Carpet/CarpetLib/src/dh.cc @@ -540,6 +540,8 @@ void dh::calculate_bases () void dh::save_time (bool do_prolongate) { + DECLARE_CCTK_PARAMETERS; + for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) { (*f)->recompose_crop (); } @@ -547,22 +549,45 @@ void dh::save_time (bool do_prolongate) for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { (*f)->recompose_allocate (rl); } - for (comm_state state; !state.done(); state.step()) { + // make the comm_state loop the innermost + // in order to minimise the number of outstanding communications + if (minimise_outstanding_communications) { for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { - (*f)->recompose_fill (state, rl, do_prolongate); + for (comm_state state; !state.done(); state.step()) { + (*f)->recompose_fill (state, rl, do_prolongate); + } + } + for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) { + (*f)->recompose_free (rl); } - } - for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) { - (*f)->recompose_free (rl); - } - for (comm_state state; !state.done(); state.step()) { for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { - (*f)->recompose_bnd_prolongate (state, rl, do_prolongate); + for (comm_state state; !state.done(); state.step()) { + (*f)->recompose_bnd_prolongate (state, rl, do_prolongate); + } } - } - for (comm_state state; !state.done(); state.step()) { for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { - (*f)->recompose_sync (state, rl, do_prolongate); + for (comm_state state; !state.done(); state.step()) { + (*f)->recompose_sync (state, rl, do_prolongate); + } + } + } else { + for (comm_state state; !state.done(); state.step()) { + for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { + (*f)->recompose_fill (state, rl, do_prolongate); + } + } + for (list<ggf*>::reverse_iterator f=gfs.rbegin(); f!=gfs.rend(); ++f) { + (*f)->recompose_free (rl); + } + for (comm_state state; !state.done(); state.step()) { + for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { + (*f)->recompose_bnd_prolongate (state, rl, do_prolongate); + } + } + for (comm_state state; !state.done(); state.step()) { + for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) { + (*f)->recompose_sync (state, rl, do_prolongate); + } } } } // for rl |