diff options
author | Erik Schnetter <schnetter@aei.mpg.de> | 2005-01-02 17:35:00 +0000 |
---|---|---|
committer | Erik Schnetter <schnetter@aei.mpg.de> | 2005-01-02 17:35:00 +0000 |
commit | 027fbe005b8263b054cdcb51cde716a00183f6b9 (patch) | |
tree | 3d77bfcab9cb51f5de0911f3c8dba6940778264a /Carpet/CarpetLib | |
parent | 43d0a63ab79490aa39fc560b199f4956554869ca (diff) |
CarpetLib: Add lightweight communication buffers (untested)
Lightweight communication buffers use essentially only a vector<T>
instead of a data<T> to transfer data between processors. This should
reduce the computational overhead.
Set the parameter "use_lightweight_buffers" to use this feature. This
feature is completely untested.
darcs-hash:20050102173524-891bb-6a3999cbd63e367c8520c175c8078374d294eaa8.gz
Diffstat (limited to 'Carpet/CarpetLib')
-rw-r--r-- | Carpet/CarpetLib/param.ccl | 4 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/commstate.hh | 17 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.cc | 172 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.hh | 18 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.cc | 106 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.hh | 22 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/timestat.cc | 22 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/timestat.hh | 11 |
8 files changed, 339 insertions, 33 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index 59c958183..55754ef0f 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -40,3 +40,7 @@ BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements" BOOLEAN combine_recv_send "Combine MPI_Irecv and MPI_Isend calls" { } "no" + +BOOLEAN use_lightweight_buffers "Use lightweight communication buffers instead of data objects" +{ +} "no" diff --git a/Carpet/CarpetLib/src/commstate.hh b/Carpet/CarpetLib/src/commstate.hh index 340d09ded..5a9570c7c 100644 --- a/Carpet/CarpetLib/src/commstate.hh +++ b/Carpet/CarpetLib/src/commstate.hh @@ -30,10 +30,25 @@ private: // Forbid copying and passing by value comm_state (comm_state const &); comm_state& operator= (comm_state const &); + public: + // Lists of temporary data objects queue<gdata*> tmps1, tmps2; - vector<MPI_Request> requests; // for use_waitall + + // List of MPI requests for use_waitall + vector<MPI_Request> requests; + + // Lists of communication buffers for use_lightweight_buffers + struct gcommbuf { + bool am_receiver, am_sender; + MPI_Request request; + }; + template<typename T> + struct commbuf : gcommbuf { + vector<T> data; + }; + queue<gcommbuf*> recvbufs, sendbufs; }; diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 1fffe5f2f..76c93733b 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -398,6 +398,178 @@ void data<T>::change_processor_wait (comm_state& state, +template<typename T> +void +data<T>::copy_from_recv_inner (comm_state& state, + const gdata* gsrc, const ibbox& box) +{ + DECLARE_CCTK_PARAMETERS; + + wtime_copyfrom_recvinner_allocate.start(); + comm_state::commbuf<T> * b = new comm_state::commbuf<T>; + b->am_receiver = true; + b->am_sender = false; + b->data.resize (prod (box.shape() / box.stride())); + wtime_copyfrom_recvinner_allocate.stop(); + + wtime_copyfrom_recvinner_recv.start(); + assert (dist::rank() == proc()); + T dummy; + MPI_Irecv (&b->data.front(), b->data.size(), + dist::datatype(dummy), gsrc->proc(), + tag, dist::comm, &b->request); + wtime_copyfrom_recvinner_recv.stop(); + if (use_waitall) { + state.requests.push_back (b->request); + } + state.recvbufs.push (b); +} + + + +template<typename T> +void +data<T>::copy_from_send_inner (comm_state& state, + const gdata* gsrc, const ibbox& box) +{ + DECLARE_CCTK_PARAMETERS; + + wtime_copyfrom_sendinner_allocate.start(); + comm_state::commbuf<T> * b = new comm_state::commbuf<T>; + b->am_receiver = false; + b->am_sender = true; + b->data.resize (prod (box.shape() / box.stride())); + wtime_copyfrom_sendinner_allocate.stop(); + + wtime_copyfrom_sendinner_copy.start(); + const data<T> * src = dynamic_cast<const data<T> *> (gsrc); + assert (src->_has_storage); + assert (src->_owns_storage); + // copy src to b + { + T * restrict p = & b->data.front(); + T const * restrict const q = src->_storage; + ivect const imin = box.lower() / box.stride(); + ivect const imax = (box.upper() + box.stride()) / box.stride(); + ivect const lbnd = src->extent().lower() / src->extent().stride(); + ivect const lsh = src->extent().shape() / src->extent().stride(); + for (int k=imin[2]; k<imax[2]; ++k) { + for (int j=imin[1]; j<imax[1]; ++j) { + for (int i=imin[0]; i<imax[0]; ++i) { + * p ++ = q [i - lbnd[0] + lsh[0] * (j - lbnd[1] + lsh[1] * (k - lbnd[2]))]; + } + } + } + } + wtime_copyfrom_sendinner_copy.stop(); + + wtime_copyfrom_sendinner_send.start(); + assert (dist::rank() == src->proc()); + T dummy; + MPI_Isend (&b->data.front(), b->data.size(), dist::datatype(dummy), proc(), + tag, dist::comm, &b->request); + wtime_copyfrom_sendinner_send.stop(); + if (use_waitall) { + state.requests.push_back (b->request); + } + state.sendbufs.push (b); +} + + + +template<typename T> +void +data<T>::copy_from_recv_wait_inner (comm_state& state, + const gdata* gsrc, const ibbox& box) +{ + DECLARE_CCTK_PARAMETERS; + + comm_state::commbuf<T> * b + = (comm_state::commbuf<T> *) state.recvbufs.front(); + state.recvbufs.pop(); + assert (b->am_receiver); + assert (! b->am_sender); + + wtime_copyfrom_recvwaitinner_wait.start(); + if (use_waitall) { + if (! state.requests.empty()) { + // wait for all requests at once + MPI_Waitall + (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE); + state.requests.clear(); + } + } + + if (! use_waitall) { + MPI_Status status; + MPI_Wait (&b->request, &status); + } + wtime_copyfrom_recvwaitinner_wait.stop(); + + wtime_copyfrom_recvwaitinner_copy.start(); + assert (_has_storage); + assert (_owns_storage); + // copy b to this + { + T * restrict const p = _storage; + T const * restrict q = & b->data.front(); + ivect const imin = box.lower() / box.stride(); + ivect const imax = (box.upper() + box.stride()) / box.stride(); + ivect const lbnd = extent().lower() / extent().stride(); + ivect const lsh = extent().shape() / extent().stride(); + for (int k=imin[2]; k<imax[2]; ++k) { + for (int j=imin[1]; j<imax[1]; ++j) { + for (int i=imin[0]; i<imax[0]; ++i) { + p [i - lbnd[0] + lsh[0] * (j - lbnd[1] + lsh[1] * (k - lbnd[2]))] = * q ++; + } + } + } + } + wtime_copyfrom_recvwaitinner_copy.stop(); + + wtime_copyfrom_recvwaitinner_delete.start(); + delete b; + wtime_copyfrom_recvwaitinner_delete.stop(); +} + + + +template<typename T> +void +data<T>::copy_from_send_wait_inner (comm_state& state, + const gdata* gsrc, const ibbox& box) +{ + DECLARE_CCTK_PARAMETERS; + + comm_state::commbuf<T> * b + = (comm_state::commbuf<T> *) state.sendbufs.front(); + state.sendbufs.pop(); + assert (! b->am_receiver); + assert (b->am_sender); + + wtime_copyfrom_sendwaitinner_wait.start(); + if (use_waitall) { + if (! state.requests.empty()) { + // wait for all requests at once + MPI_Waitall + (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE); + state.requests.clear(); + } + } + + if (! use_waitall) { + MPI_Status status; + MPI_Wait (&b->request, &status); + } + wtime_copyfrom_sendwaitinner_wait.stop(); + + wtime_copyfrom_sendwaitinner_delete.start(); + delete b; + wtime_copyfrom_sendwaitinner_delete.stop(); +} + + + // Data manipulators template<typename T> void data<T> diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh index 7eea2c867..27b91acfb 100644 --- a/Carpet/CarpetLib/src/data.hh +++ b/Carpet/CarpetLib/src/data.hh @@ -107,6 +107,24 @@ public: assert (_storage); return _storage[offset(index)]; } + +protected: + virtual void + copy_from_recv_inner (comm_state& state, + const gdata* src, + const ibbox& box); + virtual void + copy_from_send_inner (comm_state& state, + const gdata* src, + const ibbox& box); + virtual void + copy_from_recv_wait_inner (comm_state& state, + const gdata* src, + const ibbox& box); + virtual void + copy_from_send_wait_inner (comm_state& state, + const gdata* src, + const ibbox& box); // Data manipulators private: diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index 55f1ce197..afd56a5dd 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -157,6 +157,8 @@ void gdata::copy_from_nocomm (const gdata* src, const ibbox& box) void gdata::copy_from_recv (comm_state& state, const gdata* src, const ibbox& box) { + DECLARE_CCTK_PARAMETERS; + assert (has_storage() && src->has_storage()); assert (all(box.lower()>=extent().lower() && box.lower()>=src->extent().lower())); @@ -177,16 +179,27 @@ void gdata::copy_from_recv (comm_state& state, } else { // copy to different processor - wtime_copyfrom_recv_maketyped.start(); - gdata* const tmp = make_typed(varindex, transport_operator); - wtime_copyfrom_recv_maketyped.stop(); - state.tmps1.push (tmp); - wtime_copyfrom_recv_allocate.start(); - tmp->allocate (box, src->proc()); - wtime_copyfrom_recv_allocate.stop(); - wtime_copyfrom_recv_changeproc_recv.start(); - tmp->change_processor_recv (state, proc()); - wtime_copyfrom_recv_changeproc_recv.stop(); + if (! use_lightweight_buffers) { + + wtime_copyfrom_recv_maketyped.start(); + gdata* const tmp = make_typed(varindex, transport_operator); + wtime_copyfrom_recv_maketyped.stop(); + state.tmps1.push (tmp); + wtime_copyfrom_recv_allocate.start(); + tmp->allocate (box, src->proc()); + wtime_copyfrom_recv_allocate.stop(); + wtime_copyfrom_recv_changeproc_recv.start(); + tmp->change_processor_recv (state, proc()); + wtime_copyfrom_recv_changeproc_recv.stop(); + + } else { + + if (dist::rank() == proc()) { + // this processor receives data + copy_from_recv_inner (state, src, box); + } + + } } @@ -198,6 +211,8 @@ void gdata::copy_from_recv (comm_state& state, void gdata::copy_from_send (comm_state& state, const gdata* src, const ibbox& box) { + DECLARE_CCTK_PARAMETERS; + assert (has_storage() && src->has_storage()); assert (all(box.lower()>=extent().lower() && box.lower()>=src->extent().lower())); @@ -222,16 +237,26 @@ void gdata::copy_from_send (comm_state& state, } else { // copy to different processor - gdata* const tmp = state.tmps1.front(); - state.tmps1.pop(); - state.tmps2.push (tmp); - assert (tmp); - wtime_copyfrom_send_copyfrom_nocomm2.start(); - tmp->copy_from_nocomm (src, box); - wtime_copyfrom_send_copyfrom_nocomm2.stop(); - wtime_copyfrom_send_changeproc_send.start(); - tmp->change_processor_send (state, proc()); - wtime_copyfrom_send_changeproc_send.stop(); + if (! use_lightweight_buffers) { + + gdata* const tmp = state.tmps1.front(); + state.tmps1.pop(); + state.tmps2.push (tmp); + assert (tmp); + wtime_copyfrom_send_copyfrom_nocomm2.start(); + tmp->copy_from_nocomm (src, box); + wtime_copyfrom_send_copyfrom_nocomm2.stop(); + wtime_copyfrom_send_changeproc_send.start(); + tmp->change_processor_send (state, proc()); + wtime_copyfrom_send_changeproc_send.stop(); + + } else { + + if (dist::rank() == src->proc()) { + // this processor sends data + copy_from_send_inner (state, src, box); + } + } } @@ -243,6 +268,8 @@ void gdata::copy_from_send (comm_state& state, void gdata::copy_from_wait (comm_state& state, const gdata* src, const ibbox& box) { + DECLARE_CCTK_PARAMETERS; + assert (has_storage() && src->has_storage()); assert (all(box.lower()>=extent().lower() && box.lower()>=src->extent().lower())); @@ -263,18 +290,33 @@ void gdata::copy_from_wait (comm_state& state, } else { // copy to different processor - gdata* const tmp = state.tmps2.front(); - state.tmps2.pop(); - assert (tmp); - wtime_copyfrom_wait_changeproc_wait.start(); - tmp->change_processor_wait (state, proc()); - wtime_copyfrom_wait_changeproc_wait.stop(); - wtime_copyfrom_wait_copyfrom_nocomm.start(); - copy_from_nocomm (tmp, box); - wtime_copyfrom_wait_copyfrom_nocomm.stop(); - wtime_copyfrom_wait_delete.start(); - delete tmp; - wtime_copyfrom_wait_delete.stop(); + if (! use_lightweight_buffers) { + + gdata* const tmp = state.tmps2.front(); + state.tmps2.pop(); + assert (tmp); + wtime_copyfrom_wait_changeproc_wait.start(); + tmp->change_processor_wait (state, proc()); + wtime_copyfrom_wait_changeproc_wait.stop(); + wtime_copyfrom_wait_copyfrom_nocomm.start(); + copy_from_nocomm (tmp, box); + wtime_copyfrom_wait_copyfrom_nocomm.stop(); + wtime_copyfrom_wait_delete.start(); + delete tmp; + wtime_copyfrom_wait_delete.stop(); + + } else { + + if (dist::rank() == proc()) { + // this processor receives data + copy_from_recv_wait_inner (state, src, box); + } + if (dist::rank() == src->proc()) { + // this processor sends data + copy_from_send_wait_inner (state, src, box); + } + + } } diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index 72d136f5e..e6c9434f9 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -151,6 +151,28 @@ public: const gdata* src, const ibbox& box); void copy_from_wait (comm_state& state, const gdata* src, const ibbox& box); + protected: + virtual void + copy_from_recv_inner (comm_state& state, + const gdata* src, + const ibbox& box) + = 0; + virtual void + copy_from_send_inner (comm_state& state, + const gdata* src, + const ibbox& box) + = 0; + virtual void + copy_from_recv_wait_inner (comm_state& state, + const gdata* src, + const ibbox& box) + = 0; + virtual void + copy_from_send_wait_inner (comm_state& state, + const gdata* src, + const ibbox& box) + = 0; + public: void interpolate_from (comm_state& state, const vector<const gdata*> srcs, diff --git a/Carpet/CarpetLib/src/timestat.cc b/Carpet/CarpetLib/src/timestat.cc index 0c43c6ed3..8fd104304 100644 --- a/Carpet/CarpetLib/src/timestat.cc +++ b/Carpet/CarpetLib/src/timestat.cc @@ -74,6 +74,17 @@ timestat wtime_copyfrom_wait_changeproc_wait; timestat wtime_copyfrom_wait_copyfrom_nocomm; timestat wtime_copyfrom_wait_delete; +timestat wtime_copyfrom_recvinner_allocate; +timestat wtime_copyfrom_recvinner_recv; +timestat wtime_copyfrom_sendinner_allocate; +timestat wtime_copyfrom_sendinner_copy; +timestat wtime_copyfrom_sendinner_send; +timestat wtime_copyfrom_recvwaitinner_wait; +timestat wtime_copyfrom_recvwaitinner_copy; +timestat wtime_copyfrom_recvwaitinner_delete; +timestat wtime_copyfrom_sendwaitinner_wait; +timestat wtime_copyfrom_sendwaitinner_delete; + timestat wtime_changeproc_recv; timestat wtime_changeproc_send; timestat wtime_changeproc_wait; @@ -107,6 +118,17 @@ void CarpetLib_printtimestats (CCTK_ARGUMENTS) << " wtime_copyfrom_wait_copyfrom_nocomm2: " << wtime_copyfrom_wait_copyfrom_nocomm << endl << " wtime_copyfrom_wait_delete: " << wtime_copyfrom_wait_delete << endl << endl + << " wtime_copyfrom_recvinner_allocate: " << wtime_copyfrom_recvinner_allocate << endl + << " wtime_copyfrom_recvinner_recv: " << wtime_copyfrom_recvinner_recv << endl + << " wtime_copyfrom_sendinner_allocate: " << wtime_copyfrom_sendinner_allocate << endl + << " wtime_copyfrom_sendinner_copy: " << wtime_copyfrom_sendinner_copy << endl + << " wtime_copyfrom_sendinner_send: " << wtime_copyfrom_sendinner_send << endl + << " wtime_copyfrom_recvwaitinner_wait: " << wtime_copyfrom_recvwaitinner_wait << endl + << " wtime_copyfrom_recvwaitinner_copy: " << wtime_copyfrom_recvwaitinner_copy << endl + << " wtime_copyfrom_recvwaitinner_delete: " << wtime_copyfrom_recvwaitinner_delete << endl + << " wtime_copyfrom_sendwaitinner_wait: " << wtime_copyfrom_sendwaitinner_wait << endl + << " wtime_copyfrom_sendwaitinner_delete: " << wtime_copyfrom_sendwaitinner_delete << endl + << endl << " wtime_changeproc_recv: " << wtime_changeproc_recv << endl << " wtime_changeproc_send: " << wtime_changeproc_send << endl << " wtime_changeproc_wait: " << wtime_changeproc_wait << endl diff --git a/Carpet/CarpetLib/src/timestat.hh b/Carpet/CarpetLib/src/timestat.hh index d175d14a8..11a7a00aa 100644 --- a/Carpet/CarpetLib/src/timestat.hh +++ b/Carpet/CarpetLib/src/timestat.hh @@ -48,6 +48,17 @@ extern timestat wtime_copyfrom_wait_changeproc_wait; extern timestat wtime_copyfrom_wait_copyfrom_nocomm; extern timestat wtime_copyfrom_wait_delete; +extern timestat wtime_copyfrom_recvinner_allocate; +extern timestat wtime_copyfrom_recvinner_recv; +extern timestat wtime_copyfrom_sendinner_allocate; +extern timestat wtime_copyfrom_sendinner_copy; +extern timestat wtime_copyfrom_sendinner_send; +extern timestat wtime_copyfrom_recvwaitinner_wait; +extern timestat wtime_copyfrom_recvwaitinner_copy; +extern timestat wtime_copyfrom_recvwaitinner_delete; +extern timestat wtime_copyfrom_sendwaitinner_wait; +extern timestat wtime_copyfrom_sendwaitinner_delete; + extern timestat wtime_changeproc_recv; extern timestat wtime_changeproc_send; extern timestat wtime_changeproc_wait; |