CarpetLib: Implement using MPI_Waitall for communicating

Add a parameter CarpetLib::use_waitall that switches from using a series of MPI_Wait statements to using a single MPI_Waitall statement. This might improve performance on many processors. darcs-hash:20041124235118-891bb-034efea054db236a187022b1858e4574da867fa3.gz
author: Erik Schnetter <schnetter@aei.mpg.de> 2004-11-24 23:51:00 +0000
committer: Erik Schnetter <schnetter@aei.mpg.de> 2004-11-24 23:51:00 +0000
commit: 174ecc8b81ba24eb45795fbae18356c8773e582a (patch)
tree: 7b61d94a165cb267ac8914dbe3dc1697635c39e6 /Carpet
parent: 545d5b76a2ade3dd3e595492122da8cc454d0e23 (diff)
5 files changed, 90 insertions, 34 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 47255091f..eb5929c15 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -22,3 +22,7 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE=
 BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed"
 {
 } "no"
+
+BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements"
+{
+} "no"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 0179408d8..d5d6d78f1 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -15,6 +15,7 @@
 #include <mpi.h>
 
 #include "cctk.h"
+#include "cctk_Parameters.h"
 
 #include "bbox.hh"
 #include "defs.hh"
@@ -230,13 +231,13 @@ void data<T,D>::change_processor (comm_state<D>& state,
 {
   switch (state.thestate) {
   case state_recv:
-    change_processor_recv (newproc, mem);
+    change_processor_recv (state, newproc, mem);
     break;
   case state_send:
-    change_processor_send (newproc, mem);
+    change_processor_send (state, newproc, mem);
     break;
   case state_wait:
-    change_processor_wait (newproc, mem);
+    change_processor_wait (state, newproc, mem);
     break;
   default:
     assert(0);
@@ -246,8 +247,12 @@ void data<T,D>::change_processor (comm_state<D>& state,
 
 
 template<class T, int D>
-void data<T,D>::change_processor_recv (const int newproc, void* const mem)
+void data<T,D>::change_processor_recv (comm_state<D>& state,
+                                       const int newproc,
+                                       void* const mem)
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   assert (!this->comm_active);
   this->comm_active = true;
   
@@ -276,6 +281,9 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem)
                  this->tag, dist::comm, &this->request);
       const double wtime2 = MPI_Wtime();
       this->wtime_irecv += wtime2 - wtime1;
+      if (use_waitall) {
+        state.requests.push_back (this->request);
+      }
       
     } else if (rank == this->_proc) {
       // copy to other processor
@@ -290,8 +298,12 @@ void data<T,D>::change_processor_recv (const int newproc, void* const mem)
 
 
 template<class T, int D>
-void data<T,D>::change_processor_send (const int newproc, void* const mem)
+void data<T,D>::change_processor_send (comm_state<D>& state,
+                                       const int newproc,
+                                       void* const mem)
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   assert (this->comm_active);
   
   if (newproc == this->_proc) {
@@ -317,6 +329,9 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem)
                  this->tag, dist::comm, &this->request);
       const double wtime2 = MPI_Wtime();
       this->wtime_isend += wtime2 - wtime1;
+      if (use_waitall) {
+        state.requests.push_back (this->request);
+      }
       
     } else {
       assert (!mem);
@@ -328,8 +343,12 @@ void data<T,D>::change_processor_send (const int newproc, void* const mem)
 
 
 template<class T, int D>
-void data<T,D>::change_processor_wait (const int newproc, void* const mem)
+void data<T,D>::change_processor_wait (comm_state<D>& state,
+                                       const int newproc,
+                                       void* const mem)
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   assert (this->comm_active);
   this->comm_active = false;
   
@@ -337,6 +356,17 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
     assert (!mem);
     return;
   }
+
+  if (use_waitall) {
+    if (! state.requests.empty()) {
+      const double wtime1 = MPI_Wtime();
+      MPI_Waitall
+        (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
+      const double wtime2 = MPI_Wtime();
+      this->wtime_irecvwait += wtime2 - wtime1;
+      state.requests.clear();
+    }
+  }
   
   if (this->_has_storage) {
     int rank;
@@ -344,11 +374,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
     if (rank == newproc) {
       // copy from other processor
       
-      const double wtime1 = MPI_Wtime();
-      MPI_Status status;
-      MPI_Wait (&this->request, &status);
-      const double wtime2 = MPI_Wtime();
-      this->wtime_irecvwait += wtime2 - wtime1;
+      if (! use_waitall) {
+        const double wtime1 = MPI_Wtime();
+        MPI_Status status;
+        MPI_Wait (&this->request, &status);
+        const double wtime2 = MPI_Wtime();
+        this->wtime_irecvwait += wtime2 - wtime1;
+      }
       
     } else if (rank == this->_proc) {
       // copy to other processor
@@ -356,11 +388,13 @@ void data<T,D>::change_processor_wait (const int newproc, void* const mem)
       assert (!mem);
       assert (_storage);
       
-      const double wtime1 = MPI_Wtime();
-      MPI_Status status;
-      MPI_Wait (&this->request, &status);
-      const double wtime2 = MPI_Wtime();
-      this->wtime_isendwait += wtime2 - wtime1;
+      if (! use_waitall) {
+        const double wtime1 = MPI_Wtime();
+        MPI_Status status;
+        MPI_Wait (&this->request, &status);
+        const double wtime2 = MPI_Wtime();
+        this->wtime_isendwait += wtime2 - wtime1;
+      }
       
       if (this->_owns_storage) {
 	freemem ();
diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh
index f8f184050..524f77c4c 100644
--- a/Carpet/CarpetLib/src/data.hh
+++ b/Carpet/CarpetLib/src/data.hh
@@ -78,11 +78,18 @@ public:
 
   // Processor management
   virtual void change_processor (comm_state<D>& state,
-                                 const int newproc, void* const mem=0);
+                                 const int newproc,
+                                 void* const mem=0);
 private:
-  virtual void change_processor_recv (const int newproc, void* const mem=0);
-  virtual void change_processor_send (const int newproc, void* const mem=0);
-  virtual void change_processor_wait (const int newproc, void* const mem=0);
+  virtual void change_processor_recv (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0);
+  virtual void change_processor_send (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0);
+  virtual void change_processor_wait (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0);
 public:
 
   // Accessors
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index af586120f..ea44d8043 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -53,6 +53,7 @@ comm_state<D>::~comm_state ()
   for (size_t n=0; n<tmps.size(); ++n) {
     assert (tmps.at(n) == NULL);
   }
+  assert (requests.empty());
 }
 
 
@@ -172,7 +173,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
     state.tmps.push_back (tmp);
     ++state.current;
     tmp->allocate (box, src->proc());
-    tmp->change_processor_recv (proc());
+    tmp->change_processor_recv (state, proc());
     
   }
 }
@@ -207,7 +208,7 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
     ++state.current;
     assert (tmp);
     tmp->copy_from_nocomm (src, box);
-    tmp->change_processor_send (proc());
+    tmp->change_processor_send (state, proc());
     
   }
 }
@@ -238,7 +239,7 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
     // copy to different processor
     gdata<D>* const tmp = state.tmps.at(state.current);
     assert (tmp);
-    tmp->change_processor_wait (proc());
+    tmp->change_processor_wait (state, proc());
     copy_from_nocomm (tmp, box);
     delete tmp;
     state.tmps.at(state.current) = NULL;
@@ -351,7 +352,7 @@ void gdata<D>
     state.tmps.push_back (tmp);
     ++state.current;
     tmp->allocate (box, srcs.at(0)->proc());
-    tmp->change_processor_recv (proc());
+    tmp->change_processor_recv (state, proc());
     
   }
 }
@@ -393,7 +394,7 @@ void gdata<D>
     gdata<D>* const tmp = state.tmps.at(state.current++);
     assert (tmp);
     tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time);
-    tmp->change_processor_send (proc());
+    tmp->change_processor_send (state, proc());
     
   }
 }
@@ -432,7 +433,7 @@ void gdata<D>
     
     gdata<D>* const tmp = state.tmps.at(state.current);
     assert (tmp);
-    tmp->change_processor_wait (proc());
+    tmp->change_processor_wait (state, proc());
     copy_from_nocomm (tmp, box);
     delete tmp;
     state.tmps.at(state.current) = NULL;
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index 0fb1882d4..26dd33251 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -21,7 +21,6 @@
 using namespace std;
 
 
-  
 
 template<int D>
 class gdata;
@@ -40,12 +39,13 @@ struct comm_state {
   ~comm_state ();
   
 private:
-  // Forbit copying and passing by value
+  // Forbid copying and passing by value
   comm_state (comm_state const &);
   comm_state& operator= (comm_state const &);
 public:
   
   vector<gdata<D>*> tmps;
+  vector<MPI_Request> requests; // for use_waitall
   size_t current;
 };
 
@@ -80,8 +80,8 @@ protected:                      // should be readonly
   
   ibbox _extent;		// bbox for all data
   
-  bool comm_active;
-  MPI_Request request;
+  bool comm_active;             // a communication is going on
+  MPI_Request request;          // outstanding MPI request
   
   int tag;                      // MPI tag for this object
   
@@ -101,11 +101,21 @@ public:
   
   // Processor management
   virtual void change_processor (comm_state<D>& state,
-                                 const int newproc, void* const mem=0) = 0;
+                                 const int newproc,
+                                 void* const mem=0) = 0;
  protected:
-  virtual void change_processor_recv (const int newproc, void* const mem=0) = 0;
-  virtual void change_processor_send (const int newproc, void* const mem=0) = 0;
-  virtual void change_processor_wait (const int newproc, void* const mem=0) = 0;
+  virtual void change_processor_recv (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0)
+    = 0;
+  virtual void change_processor_send (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0)
+    = 0;
+  virtual void change_processor_wait (comm_state<D>& state,
+                                      const int newproc,
+                                      void* const mem=0)
+    = 0;
  public:
   
   // Storage management
author	Erik Schnetter <schnetter@aei.mpg.de>	2004-11-24 23:51:00 +0000
committer	Erik Schnetter <schnetter@aei.mpg.de>	2004-11-24 23:51:00 +0000
commit	174ecc8b81ba24eb45795fbae18356c8773e582a (patch)
tree	7b61d94a165cb267ac8914dbe3dc1697635c39e6 /Carpet
parent	545d5b76a2ade3dd3e595492122da8cc454d0e23 (diff)