CarpetLib: Introduce parameter to post MPI_Irecv and MPI_Isend at the same time

Introduce parameter a parameter to post MPI_Irecv and MPI_Isend at the same time. Use two queues instead of one vector to store the MPI_Requests. darcs-hash:20041208222541-891bb-c7c8994a0c41b6cfb37f6dc023bc1172238f3619.gz
author: Erik Schnetter <schnetter@aei.mpg.de> 2004-12-08 22:25:00 +0000
committer: Erik Schnetter <schnetter@aei.mpg.de> 2004-12-08 22:25:00 +0000
commit: 841ec5b3a4246d1a626debc96b3d6f37ea413992 (patch)
tree: 5f854b9fc28c284e3a66c10ac4648437bced3867
parent: 6781c02ef3265d592dc407c03c53552943979c4b (diff)
5 files changed, 126 insertions, 58 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 8ba0207d2..30b205a2d 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -33,3 +33,7 @@ INT max_mpi_tags "Maximum number of MPI tags to use"
 BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements"
 {
 } "no"
+
+BOOLEAN combine_recv_send "Combine MPI_Irecv and MPI_Isend calls"
+{
+} "no"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index e4993a900..6c4c677d9 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -224,28 +224,6 @@ T* data<T,D>::vectordata (const int vectorindex) const
 
 // Processor management
 template<class T, int D>
-void data<T,D>::change_processor (comm_state<D>& state,
-                                  const int newproc,
-                                  void* const mem)
-{
-  switch (state.thestate) {
-  case state_recv:
-    change_processor_recv (state, newproc, mem);
-    break;
-  case state_send:
-    change_processor_send (state, newproc, mem);
-    break;
-  case state_wait:
-    change_processor_wait (state, newproc, mem);
-    break;
-  default:
-    assert(0);
-  }
-}
-
-
-
-template<class T, int D>
 void data<T,D>::change_processor_recv (comm_state<D>& state,
                                        const int newproc,
                                        void* const mem)
@@ -354,6 +332,7 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
 
   if (use_waitall) {
     if (! state.requests.empty()) {
+      // wait for all requests at once
       const double wtime1 = MPI_Wtime();
       MPI_Waitall
         (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh
index 61953b30b..9445bf165 100644
--- a/Carpet/CarpetLib/src/data.hh
+++ b/Carpet/CarpetLib/src/data.hh
@@ -77,9 +77,6 @@ private:
 public:
 
   // Processor management
-  virtual void change_processor (comm_state<D>& state,
-                                 const int newproc,
-                                 void* const mem=0);
 private:
   virtual void change_processor_recv (comm_state<D>& state,
                                       const int newproc,
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index bec027fce..e6881aa8f 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -25,18 +25,55 @@ using namespace std;
 // Communication state control
 template<int D>
 comm_state<D>::comm_state ()
-  : thestate(state_recv),
-    current(0)
+  : thestate(state_recv)
 {
 }
 
 template<int D>
 void comm_state<D>::step ()
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   assert (thestate!=state_done);
-  assert (current==tmps.size());
-  thestate=astate(size_t(thestate)+1);
-  current=0;
+  if (combine_recv_send) {
+    switch (thestate) {
+    case state_recv:
+      assert (tmps1.empty());
+      thestate = state_wait;
+      break;
+    case state_send:
+      assert (0);
+    case state_wait:
+      assert (tmps1.empty());
+      assert (tmps2.empty());
+      thestate = state_done;
+      break;
+    case state_done:
+      assert (0);
+    default:
+      assert (0);
+    }
+  } else {
+    switch (thestate) {
+    case state_recv:
+      assert (tmps2.empty());
+      thestate = state_send;
+      break;
+    case state_send:
+      assert (tmps1.empty());
+      thestate = state_wait;
+      break;
+    case state_wait:
+      assert (tmps1.empty());
+      assert (tmps2.empty());
+      thestate = state_done;
+      break;
+    case state_done:
+      assert (0);
+    default:
+      assert (0);
+    }
+  }
 }
 
 template<int D>
@@ -49,10 +86,8 @@ template<int D>
 comm_state<D>::~comm_state ()
 {
   assert (thestate==state_recv || thestate==state_done);
-  assert (current == 0);
-  for (size_t n=0; n<tmps.size(); ++n) {
-    assert (tmps.at(n) == NULL);
-  }
+  assert (tmps1.empty());
+  assert (tmps2.empty());
   assert (requests.empty());
 }
 
@@ -100,17 +135,62 @@ gdata<D>::~gdata ()
 
 
 
+// Processor management
+template<int D>
+void gdata<D>::change_processor (comm_state<D>& state,
+                                 const int newproc,
+                                 void* const mem)
+{
+  DECLARE_CCTK_PARAMETERS;
+  
+  switch (state.thestate) {
+  case state_recv:
+    if (combine_recv_send) {
+      change_processor_recv (state, newproc, mem);
+      change_processor_send (state, newproc, mem);
+    } else {
+      change_processor_recv (state, newproc, mem);
+    }
+    break;
+  case state_send:
+    if (combine_recv_send) {
+      // do nothing
+    } else {
+      change_processor_send (state, newproc, mem);
+    }
+    break;
+  case state_wait:
+    change_processor_wait (state, newproc, mem);
+    break;
+  default:
+    assert(0);
+  }
+}
+
+
+
 // Data manipulators
 template<int D>
 void gdata<D>::copy_from (comm_state<D>& state,
                           const gdata* src, const ibbox& box)
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   switch (state.thestate) {
   case state_recv:
-    copy_from_recv (state, src, box);
+    if (combine_recv_send) {
+      copy_from_recv (state, src, box);
+      copy_from_send (state, src, box);
+    } else {
+      copy_from_recv (state, src, box);
+    }
     break;
   case state_send:
-    copy_from_send (state, src, box);
+    if (combine_recv_send) {
+      // do nothing
+    } else {
+      copy_from_send (state, src, box);
+    }
     break;
   case state_wait:
     copy_from_wait (state, src, box);
@@ -171,9 +251,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
     
     // copy to different processor
     gdata<D>* const tmp = make_typed(varindex, transport_operator);
-    // TODO: is this efficient?
-    state.tmps.push_back (tmp);
-    ++state.current;
+    state.tmps1.push (tmp);
     tmp->allocate (box, src->proc());
     tmp->change_processor_recv (state, proc());
     
@@ -206,8 +284,9 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
   } else {
     
     // copy to different processor
-    gdata<D>* const tmp = state.tmps.at(state.current);
-    ++state.current;
+    gdata<D>* const tmp = state.tmps1.front();
+    state.tmps1.pop();
+    state.tmps2.push (tmp);
     assert (tmp);
     tmp->copy_from_nocomm (src, box);
     tmp->change_processor_send (state, proc());
@@ -239,13 +318,12 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
   } else {
     
     // copy to different processor
-    gdata<D>* const tmp = state.tmps.at(state.current);
+    gdata<D>* const tmp = state.tmps2.front();
+    state.tmps2.pop();
     assert (tmp);
     tmp->change_processor_wait (state, proc());
     copy_from_nocomm (tmp, box);
     delete tmp;
-    state.tmps.at(state.current) = NULL;
-    ++state.current;
     
   }
 }
@@ -261,14 +339,25 @@ void gdata<D>
                     const int order_space,
                     const int order_time)
 {
+  DECLARE_CCTK_PARAMETERS;
+  
   assert (transport_operator != op_error);
   if (transport_operator == op_none) return;
   switch (state.thestate) {
   case state_recv:
-    interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+    if (combine_recv_send) {
+      interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+      interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+    } else {
+      interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+    }
     break;
   case state_send:
-    interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+    if (combine_recv_send) {
+      // do nothing
+    } else {
+      interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+    }
     break;
   case state_wait:
     interpolate_from_wait (state, srcs, times, box, time, order_space, order_time);
@@ -348,9 +437,7 @@ void gdata<D>
     // interpolate from other processor
     
     gdata<D>* const tmp = make_typed(varindex, transport_operator);
-    // TODO: is this efficient?
-    state.tmps.push_back (tmp);
-    ++state.current;
+    state.tmps1.push (tmp);
     tmp->allocate (box, srcs.at(0)->proc());
     tmp->change_processor_recv (state, proc());
     
@@ -391,7 +478,9 @@ void gdata<D>
   } else {
     // interpolate from other processor
     
-    gdata<D>* const tmp = state.tmps.at(state.current++);
+    gdata<D>* const tmp = state.tmps1.front();
+    state.tmps1.pop();
+    state.tmps2.push (tmp);
     assert (tmp);
     tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time);
     tmp->change_processor_send (state, proc());
@@ -431,13 +520,12 @@ void gdata<D>
   } else {
     // interpolate from other processor
     
-    gdata<D>* const tmp = state.tmps.at(state.current);
+    gdata<D>* const tmp = state.tmps2.front();
+    state.tmps2.pop();
     assert (tmp);
     tmp->change_processor_wait (state, proc());
     copy_from_nocomm (tmp, box);
     delete tmp;
-    state.tmps.at(state.current) = NULL;
-    ++state.current;
     
   }
 }
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index 9099dcc6a..12ff7b4cc 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <stdlib.h>
 
+#include <queue>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -44,9 +45,8 @@ private:
   comm_state& operator= (comm_state const &);
 public:
   
-  vector<gdata<D>*> tmps;
+  queue<gdata<D>*> tmps1, tmps2;
   vector<MPI_Request> requests; // for use_waitall
-  size_t current;
 };
 
 
@@ -100,9 +100,9 @@ public:
               const operator_type transport_operator = op_error) const = 0;
   
   // Processor management
-  virtual void change_processor (comm_state<D>& state,
-                                 const int newproc,
-                                 void* const mem=0) = 0;
+  void change_processor (comm_state<D>& state,
+                         const int newproc,
+                         void* const mem=0);
  protected:
   virtual void change_processor_recv (comm_state<D>& state,
                                       const int newproc,
author	Erik Schnetter <schnetter@aei.mpg.de>	2004-12-08 22:25:00 +0000
committer	Erik Schnetter <schnetter@aei.mpg.de>	2004-12-08 22:25:00 +0000
commit	841ec5b3a4246d1a626debc96b3d6f37ea413992 (patch)
tree	5f854b9fc28c284e3a66c10ac4648437bced3867
parent	6781c02ef3265d592dc407c03c53552943979c4b (diff)