diff options
author | Erik Schnetter <schnetter@aei.mpg.de> | 2004-11-24 23:51:00 +0000 |
---|---|---|
committer | Erik Schnetter <schnetter@aei.mpg.de> | 2004-11-24 23:51:00 +0000 |
commit | 174ecc8b81ba24eb45795fbae18356c8773e582a (patch) | |
tree | 7b61d94a165cb267ac8914dbe3dc1697635c39e6 /Carpet | |
parent | 545d5b76a2ade3dd3e595492122da8cc454d0e23 (diff) |
CarpetLib: Implement using MPI_Waitall for communicating
Add a parameter CarpetLib::use_waitall that switches from using a
series of MPI_Wait statements to using a single MPI_Waitall
statement. This might improve performance on many processors.
darcs-hash:20041124235118-891bb-034efea054db236a187022b1858e4574da867fa3.gz
Diffstat (limited to 'Carpet')
-rw-r--r-- | Carpet/CarpetLib/param.ccl | 4 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.cc | 66 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.hh | 15 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.cc | 13 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.hh | 26 |
5 files changed, 90 insertions, 34 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index 47255091f..eb5929c15 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -22,3 +22,7 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE= BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed" { } "no" + +BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements" +{ +} "no" diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 0179408d8..d5d6d78f1 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -15,6 +15,7 @@ #include <mpi.h> #include "cctk.h" +#include "cctk_Parameters.h" #include "bbox.hh" #include "defs.hh" @@ -230,13 +231,13 @@ void data<T,D>::change_processor (comm_state<D>& state, { switch (state.thestate) { case state_recv: - change_processor_recv (newproc, mem); + change_processor_recv (state, newproc, mem); break; case state_send: - change_processor_send (newproc, mem); + change_processor_send (state, newproc, mem); break; case state_wait: - change_processor_wait (newproc, mem); + change_processor_wait (state, newproc, mem); break; default: assert(0); @@ -246,8 +247,12 @@ void data<T,D>::change_processor (comm_state<D>& state, template<class T, int D> -void data<T,D>::change_processor_recv (const int newproc, void* const mem) +void data<T,D>::change_processor_recv (comm_state<D>& state, + const int newproc, + void* const mem) { + DECLARE_CCTK_PARAMETERS; + assert (!this->comm_active); this->comm_active = true; @@ -276,6 +281,9 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem) this->tag, dist::comm, &this->request); const double wtime2 = MPI_Wtime(); this->wtime_irecv += wtime2 - wtime1; + if (use_waitall) { + state.requests.push_back (this->request); + } } else if (rank == this->_proc) { // copy to other processor @@ -290,8 +298,12 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem) template<class T, int D> -void data<T,D>::change_processor_send (const int newproc, void* const mem) +void data<T,D>::change_processor_send (comm_state<D>& state, + const int newproc, + void* const mem) { + DECLARE_CCTK_PARAMETERS; + assert (this->comm_active); if (newproc == this->_proc) { @@ -317,6 +329,9 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem) this->tag, dist::comm, &this->request); const double wtime2 = MPI_Wtime(); this->wtime_isend += wtime2 - wtime1; + if (use_waitall) { + state.requests.push_back (this->request); + } } else { assert (!mem); @@ -328,8 +343,12 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem) template<class T, int D> -void data<T,D>::change_processor_wait (const int newproc, void* const mem) +void data<T,D>::change_processor_wait (comm_state<D>& state, + const int newproc, + void* const mem) { + DECLARE_CCTK_PARAMETERS; + assert (this->comm_active); this->comm_active = false; @@ -337,6 +356,17 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem) assert (!mem); return; } + + if (use_waitall) { + if (! state.requests.empty()) { + const double wtime1 = MPI_Wtime(); + MPI_Waitall + (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE); + const double wtime2 = MPI_Wtime(); + this->wtime_irecvwait += wtime2 - wtime1; + state.requests.clear(); + } + } if (this->_has_storage) { int rank; @@ -344,11 +374,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem) if (rank == newproc) { // copy from other processor - const double wtime1 = MPI_Wtime(); - MPI_Status status; - MPI_Wait (&this->request, &status); - const double wtime2 = MPI_Wtime(); - this->wtime_irecvwait += wtime2 - wtime1; + if (! use_waitall) { + const double wtime1 = MPI_Wtime(); + MPI_Status status; + MPI_Wait (&this->request, &status); + const double wtime2 = MPI_Wtime(); + this->wtime_irecvwait += wtime2 - wtime1; + } } else if (rank == this->_proc) { // copy to other processor @@ -356,11 +388,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem) assert (!mem); assert (_storage); - const double wtime1 = MPI_Wtime(); - MPI_Status status; - MPI_Wait (&this->request, &status); - const double wtime2 = MPI_Wtime(); - this->wtime_isendwait += wtime2 - wtime1; + if (! use_waitall) { + const double wtime1 = MPI_Wtime(); + MPI_Status status; + MPI_Wait (&this->request, &status); + const double wtime2 = MPI_Wtime(); + this->wtime_isendwait += wtime2 - wtime1; + } if (this->_owns_storage) { freemem (); diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh index f8f184050..524f77c4c 100644 --- a/Carpet/CarpetLib/src/data.hh +++ b/Carpet/CarpetLib/src/data.hh @@ -78,11 +78,18 @@ public: // Processor management virtual void change_processor (comm_state<D>& state, - const int newproc, void* const mem=0); + const int newproc, + void* const mem=0); private: - virtual void change_processor_recv (const int newproc, void* const mem=0); - virtual void change_processor_send (const int newproc, void* const mem=0); - virtual void change_processor_wait (const int newproc, void* const mem=0); + virtual void change_processor_recv (comm_state<D>& state, + const int newproc, + void* const mem=0); + virtual void change_processor_send (comm_state<D>& state, + const int newproc, + void* const mem=0); + virtual void change_processor_wait (comm_state<D>& state, + const int newproc, + void* const mem=0); public: // Accessors diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index af586120f..ea44d8043 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -53,6 +53,7 @@ comm_state<D>::~comm_state () for (size_t n=0; n<tmps.size(); ++n) { assert (tmps.at(n) == NULL); } + assert (requests.empty()); } @@ -172,7 +173,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state, state.tmps.push_back (tmp); ++state.current; tmp->allocate (box, src->proc()); - tmp->change_processor_recv (proc()); + tmp->change_processor_recv (state, proc()); } } @@ -207,7 +208,7 @@ void gdata<D>::copy_from_send (comm_state<D>& state, ++state.current; assert (tmp); tmp->copy_from_nocomm (src, box); - tmp->change_processor_send (proc()); + tmp->change_processor_send (state, proc()); } } @@ -238,7 +239,7 @@ void gdata<D>::copy_from_wait (comm_state<D>& state, // copy to different processor gdata<D>* const tmp = state.tmps.at(state.current); assert (tmp); - tmp->change_processor_wait (proc()); + tmp->change_processor_wait (state, proc()); copy_from_nocomm (tmp, box); delete tmp; state.tmps.at(state.current) = NULL; @@ -351,7 +352,7 @@ void gdata<D> state.tmps.push_back (tmp); ++state.current; tmp->allocate (box, srcs.at(0)->proc()); - tmp->change_processor_recv (proc()); + tmp->change_processor_recv (state, proc()); } } @@ -393,7 +394,7 @@ void gdata<D> gdata<D>* const tmp = state.tmps.at(state.current++); assert (tmp); tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time); - tmp->change_processor_send (proc()); + tmp->change_processor_send (state, proc()); } } @@ -432,7 +433,7 @@ void gdata<D> gdata<D>* const tmp = state.tmps.at(state.current); assert (tmp); - tmp->change_processor_wait (proc()); + tmp->change_processor_wait (state, proc()); copy_from_nocomm (tmp, box); delete tmp; state.tmps.at(state.current) = NULL; diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index 0fb1882d4..26dd33251 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -21,7 +21,6 @@ using namespace std; - template<int D> class gdata; @@ -40,12 +39,13 @@ struct comm_state { ~comm_state (); private: - // Forbit copying and passing by value + // Forbid copying and passing by value comm_state (comm_state const &); comm_state& operator= (comm_state const &); public: vector<gdata<D>*> tmps; + vector<MPI_Request> requests; // for use_waitall size_t current; }; @@ -80,8 +80,8 @@ protected: // should be readonly ibbox _extent; // bbox for all data - bool comm_active; - MPI_Request request; + bool comm_active; // a communication is going on + MPI_Request request; // outstanding MPI request int tag; // MPI tag for this object @@ -101,11 +101,21 @@ public: // Processor management virtual void change_processor (comm_state<D>& state, - const int newproc, void* const mem=0) = 0; + const int newproc, + void* const mem=0) = 0; protected: - virtual void change_processor_recv (const int newproc, void* const mem=0) = 0; - virtual void change_processor_send (const int newproc, void* const mem=0) = 0; - virtual void change_processor_wait (const int newproc, void* const mem=0) = 0; + virtual void change_processor_recv (comm_state<D>& state, + const int newproc, + void* const mem=0) + = 0; + virtual void change_processor_send (comm_state<D>& state, + const int newproc, + void* const mem=0) + = 0; + virtual void change_processor_wait (comm_state<D>& state, + const int newproc, + void* const mem=0) + = 0; public: // Storage management |