aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@aei.mpg.de>2004-11-24 23:51:00 +0000
committerErik Schnetter <schnetter@aei.mpg.de>2004-11-24 23:51:00 +0000
commit174ecc8b81ba24eb45795fbae18356c8773e582a (patch)
tree7b61d94a165cb267ac8914dbe3dc1697635c39e6 /Carpet
parent545d5b76a2ade3dd3e595492122da8cc454d0e23 (diff)
CarpetLib: Implement using MPI_Waitall for communicating
Add a parameter CarpetLib::use_waitall that switches from using a series of MPI_Wait statements to using a single MPI_Waitall statement. This might improve performance on many processors. darcs-hash:20041124235118-891bb-034efea054db236a187022b1858e4574da867fa3.gz
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/CarpetLib/param.ccl4
-rw-r--r--Carpet/CarpetLib/src/data.cc66
-rw-r--r--Carpet/CarpetLib/src/data.hh15
-rw-r--r--Carpet/CarpetLib/src/gdata.cc13
-rw-r--r--Carpet/CarpetLib/src/gdata.hh26
5 files changed, 90 insertions, 34 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 47255091f..eb5929c15 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -22,3 +22,7 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE=
BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed"
{
} "no"
+
+BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements"
+{
+} "no"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 0179408d8..d5d6d78f1 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -15,6 +15,7 @@
#include <mpi.h>
#include "cctk.h"
+#include "cctk_Parameters.h"
#include "bbox.hh"
#include "defs.hh"
@@ -230,13 +231,13 @@ void data<T,D>::change_processor (comm_state<D>& state,
{
switch (state.thestate) {
case state_recv:
- change_processor_recv (newproc, mem);
+ change_processor_recv (state, newproc, mem);
break;
case state_send:
- change_processor_send (newproc, mem);
+ change_processor_send (state, newproc, mem);
break;
case state_wait:
- change_processor_wait (newproc, mem);
+ change_processor_wait (state, newproc, mem);
break;
default:
assert(0);
@@ -246,8 +247,12 @@ void data<T,D>::change_processor (comm_state<D>& state,
template<class T, int D>
-void data<T,D>::change_processor_recv (const int newproc, void* const mem)
+void data<T,D>::change_processor_recv (comm_state<D>& state,
+ const int newproc,
+ void* const mem)
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (!this->comm_active);
this->comm_active = true;
@@ -276,6 +281,9 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem)
this->tag, dist::comm, &this->request);
const double wtime2 = MPI_Wtime();
this->wtime_irecv += wtime2 - wtime1;
+ if (use_waitall) {
+ state.requests.push_back (this->request);
+ }
} else if (rank == this->_proc) {
// copy to other processor
@@ -290,8 +298,12 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem)
template<class T, int D>
-void data<T,D>::change_processor_send (const int newproc, void* const mem)
+void data<T,D>::change_processor_send (comm_state<D>& state,
+ const int newproc,
+ void* const mem)
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (this->comm_active);
if (newproc == this->_proc) {
@@ -317,6 +329,9 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem)
this->tag, dist::comm, &this->request);
const double wtime2 = MPI_Wtime();
this->wtime_isend += wtime2 - wtime1;
+ if (use_waitall) {
+ state.requests.push_back (this->request);
+ }
} else {
assert (!mem);
@@ -328,8 +343,12 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem)
template<class T, int D>
-void data<T,D>::change_processor_wait (const int newproc, void* const mem)
+void data<T,D>::change_processor_wait (comm_state<D>& state,
+ const int newproc,
+ void* const mem)
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (this->comm_active);
this->comm_active = false;
@@ -337,6 +356,17 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
assert (!mem);
return;
}
+
+ if (use_waitall) {
+ if (! state.requests.empty()) {
+ const double wtime1 = MPI_Wtime();
+ MPI_Waitall
+ (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
+ const double wtime2 = MPI_Wtime();
+ this->wtime_irecvwait += wtime2 - wtime1;
+ state.requests.clear();
+ }
+ }
if (this->_has_storage) {
int rank;
@@ -344,11 +374,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
if (rank == newproc) {
// copy from other processor
- const double wtime1 = MPI_Wtime();
- MPI_Status status;
- MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecvwait += wtime2 - wtime1;
+ if (! use_waitall) {
+ const double wtime1 = MPI_Wtime();
+ MPI_Status status;
+ MPI_Wait (&this->request, &status);
+ const double wtime2 = MPI_Wtime();
+ this->wtime_irecvwait += wtime2 - wtime1;
+ }
} else if (rank == this->_proc) {
// copy to other processor
@@ -356,11 +388,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
assert (!mem);
assert (_storage);
- const double wtime1 = MPI_Wtime();
- MPI_Status status;
- MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_isendwait += wtime2 - wtime1;
+ if (! use_waitall) {
+ const double wtime1 = MPI_Wtime();
+ MPI_Status status;
+ MPI_Wait (&this->request, &status);
+ const double wtime2 = MPI_Wtime();
+ this->wtime_isendwait += wtime2 - wtime1;
+ }
if (this->_owns_storage) {
freemem ();
diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh
index f8f184050..524f77c4c 100644
--- a/Carpet/CarpetLib/src/data.hh
+++ b/Carpet/CarpetLib/src/data.hh
@@ -78,11 +78,18 @@ public:
// Processor management
virtual void change_processor (comm_state<D>& state,
- const int newproc, void* const mem=0);
+ const int newproc,
+ void* const mem=0);
private:
- virtual void change_processor_recv (const int newproc, void* const mem=0);
- virtual void change_processor_send (const int newproc, void* const mem=0);
- virtual void change_processor_wait (const int newproc, void* const mem=0);
+ virtual void change_processor_recv (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0);
+ virtual void change_processor_send (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0);
+ virtual void change_processor_wait (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0);
public:
// Accessors
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index af586120f..ea44d8043 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -53,6 +53,7 @@ comm_state<D>::~comm_state ()
for (size_t n=0; n<tmps.size(); ++n) {
assert (tmps.at(n) == NULL);
}
+ assert (requests.empty());
}
@@ -172,7 +173,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
state.tmps.push_back (tmp);
++state.current;
tmp->allocate (box, src->proc());
- tmp->change_processor_recv (proc());
+ tmp->change_processor_recv (state, proc());
}
}
@@ -207,7 +208,7 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
++state.current;
assert (tmp);
tmp->copy_from_nocomm (src, box);
- tmp->change_processor_send (proc());
+ tmp->change_processor_send (state, proc());
}
}
@@ -238,7 +239,7 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
// copy to different processor
gdata<D>* const tmp = state.tmps.at(state.current);
assert (tmp);
- tmp->change_processor_wait (proc());
+ tmp->change_processor_wait (state, proc());
copy_from_nocomm (tmp, box);
delete tmp;
state.tmps.at(state.current) = NULL;
@@ -351,7 +352,7 @@ void gdata<D>
state.tmps.push_back (tmp);
++state.current;
tmp->allocate (box, srcs.at(0)->proc());
- tmp->change_processor_recv (proc());
+ tmp->change_processor_recv (state, proc());
}
}
@@ -393,7 +394,7 @@ void gdata<D>
gdata<D>* const tmp = state.tmps.at(state.current++);
assert (tmp);
tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time);
- tmp->change_processor_send (proc());
+ tmp->change_processor_send (state, proc());
}
}
@@ -432,7 +433,7 @@ void gdata<D>
gdata<D>* const tmp = state.tmps.at(state.current);
assert (tmp);
- tmp->change_processor_wait (proc());
+ tmp->change_processor_wait (state, proc());
copy_from_nocomm (tmp, box);
delete tmp;
state.tmps.at(state.current) = NULL;
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index 0fb1882d4..26dd33251 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -21,7 +21,6 @@
using namespace std;
-
template<int D>
class gdata;
@@ -40,12 +39,13 @@ struct comm_state {
~comm_state ();
private:
- // Forbit copying and passing by value
+ // Forbid copying and passing by value
comm_state (comm_state const &);
comm_state& operator= (comm_state const &);
public:
vector<gdata<D>*> tmps;
+ vector<MPI_Request> requests; // for use_waitall
size_t current;
};
@@ -80,8 +80,8 @@ protected: // should be readonly
ibbox _extent; // bbox for all data
- bool comm_active;
- MPI_Request request;
+ bool comm_active; // a communication is going on
+ MPI_Request request; // outstanding MPI request
int tag; // MPI tag for this object
@@ -101,11 +101,21 @@ public:
// Processor management
virtual void change_processor (comm_state<D>& state,
- const int newproc, void* const mem=0) = 0;
+ const int newproc,
+ void* const mem=0) = 0;
protected:
- virtual void change_processor_recv (const int newproc, void* const mem=0) = 0;
- virtual void change_processor_send (const int newproc, void* const mem=0) = 0;
- virtual void change_processor_wait (const int newproc, void* const mem=0) = 0;
+ virtual void change_processor_recv (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0)
+ = 0;
+ virtual void change_processor_send (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0)
+ = 0;
+ virtual void change_processor_wait (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0)
+ = 0;
public:
// Storage management