CarpetLib, Carpet: implement and use collective communication buffers

Collective buffers are used to gather all components' data on a processor before it gets send off to other processors in one go. This minimizes the number of outstanding MPI communications down to O(N-1) and thus improves overall efficiency as benchmarks show. Each processor allocates a pair of single send/recv buffers to communicate with all other processors. For this the class (actually, the struct) comm_state was extended by 3 more states: state_get_buffer_sizes: accumulates the sizes for the send/recv buffers state_fill_send_buffers: gathers all the data into the send buffers state_empty_recv_buffers: copies the data from the recv buffer back into the processor's components Send/recv buffers are exchanged during state_fill_send_buffers and state_empty_recv_buffers. The constructor for a comm_state struct now takes an argument <datatype> which denotes the CCTK datatype to use for the attached collective buffers. If a negative value is passed here then it falls back to using the old send/recv/wait communication scheme. The datatype argument has a default value of -1 to maintain backwards compatibility to existing code (which therefore will keep using the old scheme). The new communication scheme is chosen by setting the parameter CarpetLib::use_collective_communication_buffers to "yes". It defaults to "no" meaning that the old send/recv/wait scheme is still used. So far all the comm_state objects in the higher-level routines in thorn Carpet (restriction/prolongation, regridding, synchronization) have been enabled to use collective communication buffers. Other thorns (CarpetInterp, CarpetIO*, CarpetSlab) will follow in separate commits. darcs-hash:20050330152811-776a0-51f426887fea099d1a67b42bd79e4f786979ba91.gz
author: Thomas Radke <tradke@aei.mpg.de> 2005-03-30 15:28:00 +0000
committer: Thomas Radke <tradke@aei.mpg.de> 2005-03-30 15:28:00 +0000
commit: 94daef6e66284e4bd9cd44cfe9f554932c7773c7 (patch)
tree: df80da062422e5363d871063b9154b3589d7f71b /Carpet/Carpet/src/Restrict.cc
parent: b6ed81fe31a0a5571ea7433f1e2e2e93a630de16 (diff)
1 files changed, 101 insertions, 78 deletions
diff --git a/Carpet/Carpet/src/Restrict.cc b/Carpet/Carpet/src/Restrict.cc
index ada404cd0..df1c1304b 100644
--- a/Carpet/Carpet/src/Restrict.cc
+++ b/Carpet/Carpet/src/Restrict.cc
@@ -16,6 +16,8 @@ namespace Carpet {
   
   using namespace std;
   
+  // restricts a set of groups which all have the same vartype
+  static void RestrictGroups (const cGH* cgh, group_set& groups);
   
   
   void Restrict (const cGH* cgh)
@@ -31,90 +33,111 @@ namespace Carpet {
     
     Checkpoint ("Restrict");
     
+    // all but the finest level are restricted
+    if (reflevel == reflevels-1) {
+      return;
+    }
+
+    // sort all grid functions into sets of the same vartype
+    vector<group_set> groups;
+
+    for (int group = 0; group < CCTK_NumGroups(); ++group) {
+      if (CCTK_GroupTypeI(group) == CCTK_GF
+          && CCTK_NumVarsInGroupI(group) > 0
+          && CCTK_QueryGroupStorageI(cgh, group)) {
+
+        group_set newset;
+        const int firstvar = CCTK_FirstVarIndexI (group);
+        newset.vartype = CCTK_VarTypeI (firstvar);
+        assert (newset.vartype >= 0);
+        int c;
+        for (c = 0; c < groups.size(); c++) {
+          if (newset.vartype == groups[c].vartype) {
+            break;
+          }
+        }
+        if (c == groups.size()) {
+          groups.push_back (newset);
+        }
+        groups[c].members.push_back (group);
+      }
+    }
+
     // Restrict
-    if (reflevel < reflevels-1) {
+    for (int c = 0; c < groups.size(); c++) {
+      RestrictGroups (cgh, groups[c]);
+    }
+
+    // Synchronise
+    for (int c = 0; c < groups.size(); c++) {
+      SyncGroups (cgh, groups[c]);
+    }
+  }
+  
+
+  // restricts a set of groups which all have the same vartype
+  static void RestrictGroups (const cGH* cgh, group_set& groups) {
+    DECLARE_CCTK_PARAMETERS;
 
+    const int tl = 0;
+
+    // Use collective or single-component buffers for communication ?
+    const int vartype =
+      use_collective_communication_buffers ? groups.vartype : -1;
+
+    if (use_collective_communication_buffers ||
+        ! minimise_outstanding_communications) {
+      for (comm_state state(vartype); ! state.done(); state.step()) {
+        for (int c = 0; c < groups.members.size(); ++c) {
+          const int group = groups.members[c];
+          for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
+
+            // use background time here (which may not be modified
+            // by the user)
+            const CCTK_REAL time = vtt.at(m)->time (tl, reflevel, mglevel);
+
+            const CCTK_REAL time1 = vtt.at(m)->time (0, reflevel, mglevel);
+            const CCTK_REAL time2
+              = (cgh->cctk_time - cctk_initial_time) / delta_time;
+            assert (fabs(time1 - time2) / (fabs(time1) + fabs(time2) + fabs(cgh->cctk_delta_time)) < 1e-12);
+
+            for (int v = 0; v < arrdata.at(group).at(m).data.size(); ++v) {
+              ggf *const gv = arrdata.at(group).at(m).data.at(v);
+              for (int c = 0; c < vhh.at(m)->components(reflevel); ++c) {
+                gv->ref_restrict (state, tl, reflevel, c, mglevel, time);
+              }
+            }
+          }
+        } // loop over groups
+      } // for state
+    } else {
       // make the comm_state loop the innermost
       // in order to minimise the number of outstanding communications
-      if (minimise_outstanding_communications) {
-        for (int group=0; group<CCTK_NumGroups(); ++group) {
-          if (CCTK_GroupTypeI(group) == CCTK_GF) {
-            if (CCTK_QueryGroupStorageI(cgh, group)) {
-              
-              const int tl = 0;
-              
-              for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
-                
-                // use background time here (which may not be modified
-                // by the user)
-                const CCTK_REAL time = vtt.at(m)->time (tl, reflevel, mglevel);
-                
-                const CCTK_REAL time1 = vtt.at(m)->time (0, reflevel, mglevel);
-                const CCTK_REAL time2
-                  = (cgh->cctk_time - cctk_initial_time) / delta_time;
-                assert (fabs(time1 - time2) / (fabs(time1) + fabs(time2) + fabs(cgh->cctk_delta_time)) < 1e-12);
-                
-                for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
-                  for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
-                    for (comm_state state; !state.done(); state.step()) {
-                      arrdata.at(group).at(m).data.at(var)->ref_restrict
-                        (state, tl, reflevel, c, mglevel, time);
-                    }
-                  }
-                }
+      for (int c = 0; c < groups.members.size(); ++c) {
+        const int group = groups.members[c];
+        for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
+     
+          // use background time here (which may not be modified
+          // by the user)
+          const CCTK_REAL time = vtt.at(m)->time (tl, reflevel, mglevel);
+
+          const CCTK_REAL time1 = vtt.at(m)->time (0, reflevel, mglevel);
+          const CCTK_REAL time2
+            = (cgh->cctk_time - cctk_initial_time) / delta_time;
+          assert (fabs(time1 - time2) / (fabs(time1) + fabs(time2) + fabs(cgh->cctk_delta_time)) < 1e-12);
+  
+          for (int v = 0; v < arrdata.at(group).at(m).data.size(); ++v) {
+            ggf *const gv = arrdata.at(group).at(m).data.at(v);
+            for (int c = 0; c < vhh.at(m)->components(reflevel); ++c) {
+              for (comm_state state(vartype); ! state.done(); state.step()) {
+                gv->ref_restrict (state, tl, reflevel, c, mglevel, time);
               }
-              
-            } // if group has storage
-          } // if grouptype == CCTK_GF
-        } // loop over groups
-      } else {
-        for (comm_state state; !state.done(); state.step()) {
-          for (int group=0; group<CCTK_NumGroups(); ++group) {
-            if (CCTK_GroupTypeI(group) == CCTK_GF) {
-              if (CCTK_QueryGroupStorageI(cgh, group)) {
-              
-                const int tl = 0;
-              
-                for (int m=0; m<(int)arrdata.at(group).size(); ++m) {
-                
-                  // use background time here (which may not be modified
-                  // by the user)
-                  const CCTK_REAL time = vtt.at(m)->time (tl, reflevel, mglevel);
-                
-                  const CCTK_REAL time1 = vtt.at(m)->time (0, reflevel, mglevel);
-                  const CCTK_REAL time2
-                    = (cgh->cctk_time - cctk_initial_time) / delta_time;
-                  assert (fabs(time1 - time2) / (fabs(time1) + fabs(time2) + fabs(cgh->cctk_delta_time)) < 1e-12);
-                
-                  for (int var=0; var<(int)arrdata.at(group).at(m).data.size(); ++var) {
-                    for (int c=0; c<vhh.at(m)->components(reflevel); ++c) {
-                      arrdata.at(group).at(m).data.at(var)->ref_restrict
-                        (state, tl, reflevel, c, mglevel, time);
-                    }
-                  }
-                }
-              
-              } // if group has storage
-            } // if grouptype == CCTK_GF
-          } // loop over groups
-        } // for state
-      } // if minimise_outstanding_communications
-    } // if not finest refinement level
-    
-    
-    
-    // Sync
-    if (reflevel < reflevels-1) {
-      for (int group=0; group<CCTK_NumGroups(); ++group) {
-        if (CCTK_GroupTypeI(group) == CCTK_GF
-            && CCTK_NumVarsInGroupI(group) > 0
-            && CCTK_QueryGroupStorageI(cgh, group)) {
-          SyncGVGroup(cgh, group);
+            }
+          }
         }
-      }
-    } // if not finest refinement level
-    
+      } // loop over groups
+    } // if use_collective_communication_buffers
   }
-  
+
 } // namespace Carpet
author	Thomas Radke <tradke@aei.mpg.de>	2005-03-30 15:28:00 +0000
committer	Thomas Radke <tradke@aei.mpg.de>	2005-03-30 15:28:00 +0000
commit	94daef6e66284e4bd9cd44cfe9f554932c7773c7 (patch)
tree	df80da062422e5363d871063b9154b3589d7f71b /Carpet/Carpet/src/Restrict.cc
parent	b6ed81fe31a0a5571ea7433f1e2e2e93a630de16 (diff)