diff options
-rw-r--r-- | Carpet/CarpetLib/param.ccl | 4 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.cc | 23 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.hh | 3 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.cc | 144 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.hh | 10 |
5 files changed, 126 insertions, 58 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index 8ba0207d2..30b205a2d 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -33,3 +33,7 @@ INT max_mpi_tags "Maximum number of MPI tags to use" BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements" { } "no" + +BOOLEAN combine_recv_send "Combine MPI_Irecv and MPI_Isend calls" +{ +} "no" diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index e4993a900..6c4c677d9 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -224,28 +224,6 @@ T* data<T,D>::vectordata (const int vectorindex) const // Processor management template<class T, int D> -void data<T,D>::change_processor (comm_state<D>& state, - const int newproc, - void* const mem) -{ - switch (state.thestate) { - case state_recv: - change_processor_recv (state, newproc, mem); - break; - case state_send: - change_processor_send (state, newproc, mem); - break; - case state_wait: - change_processor_wait (state, newproc, mem); - break; - default: - assert(0); - } -} - - - -template<class T, int D> void data<T,D>::change_processor_recv (comm_state<D>& state, const int newproc, void* const mem) @@ -354,6 +332,7 @@ void data<T,D>::change_processor_wait (comm_state<D>& state, if (use_waitall) { if (! state.requests.empty()) { + // wait for all requests at once const double wtime1 = MPI_Wtime(); MPI_Waitall (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE); diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh index 61953b30b..9445bf165 100644 --- a/Carpet/CarpetLib/src/data.hh +++ b/Carpet/CarpetLib/src/data.hh @@ -77,9 +77,6 @@ private: public: // Processor management - virtual void change_processor (comm_state<D>& state, - const int newproc, - void* const mem=0); private: virtual void change_processor_recv (comm_state<D>& state, const int newproc, diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index bec027fce..e6881aa8f 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -25,18 +25,55 @@ using namespace std; // Communication state control template<int D> comm_state<D>::comm_state () - : thestate(state_recv), - current(0) + : thestate(state_recv) { } template<int D> void comm_state<D>::step () { + DECLARE_CCTK_PARAMETERS; + assert (thestate!=state_done); - assert (current==tmps.size()); - thestate=astate(size_t(thestate)+1); - current=0; + if (combine_recv_send) { + switch (thestate) { + case state_recv: + assert (tmps1.empty()); + thestate = state_wait; + break; + case state_send: + assert (0); + case state_wait: + assert (tmps1.empty()); + assert (tmps2.empty()); + thestate = state_done; + break; + case state_done: + assert (0); + default: + assert (0); + } + } else { + switch (thestate) { + case state_recv: + assert (tmps2.empty()); + thestate = state_send; + break; + case state_send: + assert (tmps1.empty()); + thestate = state_wait; + break; + case state_wait: + assert (tmps1.empty()); + assert (tmps2.empty()); + thestate = state_done; + break; + case state_done: + assert (0); + default: + assert (0); + } + } } template<int D> @@ -49,10 +86,8 @@ template<int D> comm_state<D>::~comm_state () { assert (thestate==state_recv || thestate==state_done); - assert (current == 0); - for (size_t n=0; n<tmps.size(); ++n) { - assert (tmps.at(n) == NULL); - } + assert (tmps1.empty()); + assert (tmps2.empty()); assert (requests.empty()); } @@ -100,17 +135,62 @@ gdata<D>::~gdata () +// Processor management +template<int D> +void gdata<D>::change_processor (comm_state<D>& state, + const int newproc, + void* const mem) +{ + DECLARE_CCTK_PARAMETERS; + + switch (state.thestate) { + case state_recv: + if (combine_recv_send) { + change_processor_recv (state, newproc, mem); + change_processor_send (state, newproc, mem); + } else { + change_processor_recv (state, newproc, mem); + } + break; + case state_send: + if (combine_recv_send) { + // do nothing + } else { + change_processor_send (state, newproc, mem); + } + break; + case state_wait: + change_processor_wait (state, newproc, mem); + break; + default: + assert(0); + } +} + + + // Data manipulators template<int D> void gdata<D>::copy_from (comm_state<D>& state, const gdata* src, const ibbox& box) { + DECLARE_CCTK_PARAMETERS; + switch (state.thestate) { case state_recv: - copy_from_recv (state, src, box); + if (combine_recv_send) { + copy_from_recv (state, src, box); + copy_from_send (state, src, box); + } else { + copy_from_recv (state, src, box); + } break; case state_send: - copy_from_send (state, src, box); + if (combine_recv_send) { + // do nothing + } else { + copy_from_send (state, src, box); + } break; case state_wait: copy_from_wait (state, src, box); @@ -171,9 +251,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state, // copy to different processor gdata<D>* const tmp = make_typed(varindex, transport_operator); - // TODO: is this efficient? - state.tmps.push_back (tmp); - ++state.current; + state.tmps1.push (tmp); tmp->allocate (box, src->proc()); tmp->change_processor_recv (state, proc()); @@ -206,8 +284,9 @@ void gdata<D>::copy_from_send (comm_state<D>& state, } else { // copy to different processor - gdata<D>* const tmp = state.tmps.at(state.current); - ++state.current; + gdata<D>* const tmp = state.tmps1.front(); + state.tmps1.pop(); + state.tmps2.push (tmp); assert (tmp); tmp->copy_from_nocomm (src, box); tmp->change_processor_send (state, proc()); @@ -239,13 +318,12 @@ void gdata<D>::copy_from_wait (comm_state<D>& state, } else { // copy to different processor - gdata<D>* const tmp = state.tmps.at(state.current); + gdata<D>* const tmp = state.tmps2.front(); + state.tmps2.pop(); assert (tmp); tmp->change_processor_wait (state, proc()); copy_from_nocomm (tmp, box); delete tmp; - state.tmps.at(state.current) = NULL; - ++state.current; } } @@ -261,14 +339,25 @@ void gdata<D> const int order_space, const int order_time) { + DECLARE_CCTK_PARAMETERS; + assert (transport_operator != op_error); if (transport_operator == op_none) return; switch (state.thestate) { case state_recv: - interpolate_from_recv (state, srcs, times, box, time, order_space, order_time); + if (combine_recv_send) { + interpolate_from_recv (state, srcs, times, box, time, order_space, order_time); + interpolate_from_send (state, srcs, times, box, time, order_space, order_time); + } else { + interpolate_from_recv (state, srcs, times, box, time, order_space, order_time); + } break; case state_send: - interpolate_from_send (state, srcs, times, box, time, order_space, order_time); + if (combine_recv_send) { + // do nothing + } else { + interpolate_from_send (state, srcs, times, box, time, order_space, order_time); + } break; case state_wait: interpolate_from_wait (state, srcs, times, box, time, order_space, order_time); @@ -348,9 +437,7 @@ void gdata<D> // interpolate from other processor gdata<D>* const tmp = make_typed(varindex, transport_operator); - // TODO: is this efficient? - state.tmps.push_back (tmp); - ++state.current; + state.tmps1.push (tmp); tmp->allocate (box, srcs.at(0)->proc()); tmp->change_processor_recv (state, proc()); @@ -391,7 +478,9 @@ void gdata<D> } else { // interpolate from other processor - gdata<D>* const tmp = state.tmps.at(state.current++); + gdata<D>* const tmp = state.tmps1.front(); + state.tmps1.pop(); + state.tmps2.push (tmp); assert (tmp); tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time); tmp->change_processor_send (state, proc()); @@ -431,13 +520,12 @@ void gdata<D> } else { // interpolate from other processor - gdata<D>* const tmp = state.tmps.at(state.current); + gdata<D>* const tmp = state.tmps2.front(); + state.tmps2.pop(); assert (tmp); tmp->change_processor_wait (state, proc()); copy_from_nocomm (tmp, box); delete tmp; - state.tmps.at(state.current) = NULL; - ++state.current; } } diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index 9099dcc6a..12ff7b4cc 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -6,6 +6,7 @@ #include <assert.h> #include <stdlib.h> +#include <queue> #include <iostream> #include <string> #include <vector> @@ -44,9 +45,8 @@ private: comm_state& operator= (comm_state const &); public: - vector<gdata<D>*> tmps; + queue<gdata<D>*> tmps1, tmps2; vector<MPI_Request> requests; // for use_waitall - size_t current; }; @@ -100,9 +100,9 @@ public: const operator_type transport_operator = op_error) const = 0; // Processor management - virtual void change_processor (comm_state<D>& state, - const int newproc, - void* const mem=0) = 0; + void change_processor (comm_state<D>& state, + const int newproc, + void* const mem=0); protected: virtual void change_processor_recv (comm_state<D>& state, const int newproc, |