3 files changed, 375 insertions, 361 deletions
diff --git a/Carpet/CarpetLib/src/mpi_string.cc b/Carpet/CarpetLib/src/mpi_string.cc
index 79a160f35..14131022e 100644
--- a/Carpet/CarpetLib/src/mpi_string.cc
+++ b/Carpet/CarpetLib/src/mpi_string.cc
@@ -262,362 +262,6 @@ namespace CarpetLib
   
   
   
-  template <typename T>
-  vector <vector <T> >
-  allgatherv (MPI_Comm comm,
-              vector <T> const & data)
-  {
-    // cerr << "QQQ: allgatherv[0]" << endl;
-    // Get the total number of processors
-    int num_procs;
-    MPI_Comm_size (comm, & num_procs);
-    
-    // Exchange the sizes of the data vectors
-    int const size_in = data.size();
-    assert (size_in >= 0);
-    vector <int> sizes_out (num_procs);
-    // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl;
-    MPI_Allgather (const_cast <int *> (& size_in), 1, MPI_INT,
-                   & sizes_out.front(), 1, MPI_INT,
-                   comm);
-    // cerr << "QQQ: allgatherv[2]" << endl;
-    
-    // Allocate space for all data vectors
-    vector <int> offsets_out (num_procs + 1);
-    offsets_out.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      assert (sizes_out.AT(n) >= 0);
-      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
-      assert (offsets_out.AT(n + 1) >= 0);
-    }
-    int const total_length_out = offsets_out.AT(num_procs);
-    vector <T> alldata_buffer_out (total_length_out);
-    
-    // Exchange all data vectors
-    T dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    int datatypesize;
-    MPI_Type_size (type, &datatypesize);
-    // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl;
-#if 0
-    MPI_Allgatherv (const_cast <T *> (& data.front()),
-                    size_in, type,
-                    & alldata_buffer_out.front(),
-                    & sizes_out.front(), & offsets_out.front(), type,
-                    comm);
-#else
-    int const typesize = sizeof(T);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_out.AT(n) *= typesize;
-      offsets_out.AT(n) *= typesize;
-    }
-    MPI_Allgatherv (const_cast <T *> (& data.front()),
-                    size_in * typesize, MPI_CHAR,
-                    & alldata_buffer_out.front(),
-                    & sizes_out.front(), & offsets_out.front(), MPI_CHAR,
-                    comm);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_out.AT(n) /= typesize;
-      offsets_out.AT(n) /= typesize;
-    }
-#endif
-    // cerr << "QQQ: allgatherv[4]" << endl;
-    
-    // Convert data buffer to vectors
-    vector <vector <T> > alldata_out (num_procs);
-    {
-      typename vector <T>::const_iterator p = alldata_buffer_out.begin();
-      for (int n = 0; n < num_procs; ++ n)
-      {
-        typename vector <T>::const_iterator const pold = p;
-        advance (p, sizes_out.AT(n));
-        alldata_out.AT(n).assign (pold, p);
-      }
-      assert (p == alldata_buffer_out.end());
-    }
-    
-    // cerr << "QQQ: allgatherv[5]" << endl;
-    return alldata_out;
-  }
-  
-  
-  
-  template <typename T>
-  vector <T>
-  allgatherv1 (MPI_Comm comm,
-               vector <T> const & data)
-  {
-    // cerr << "QQQ: allgatherv[0]" << endl;
-    // Get the total number of processors
-    int num_procs;
-    MPI_Comm_size (comm, & num_procs);
-    
-    // Exchange the sizes of the data vectors
-    int const size_in = data.size();
-    assert (size_in >= 0);
-    vector <int> sizes_out (num_procs);
-    // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl;
-    MPI_Allgather (const_cast <int *> (& size_in), 1, MPI_INT,
-                   & sizes_out.front(), 1, MPI_INT,
-                   comm);
-    // cerr << "QQQ: allgatherv[2]" << endl;
-    
-    // Allocate space for all data vectors
-    vector <int> offsets_out (num_procs + 1);
-    offsets_out.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      assert (sizes_out.AT(n) >= 0);
-      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
-      assert (offsets_out.AT(n + 1) >= 0);
-    }
-    int const total_length_out = offsets_out.AT(num_procs);
-    vector <T> alldata_buffer_out (total_length_out);
-    
-    // Exchange all data vectors
-    T dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    int datatypesize;
-    MPI_Type_size (type, &datatypesize);
-    // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl;
-#if 0
-    MPI_Allgatherv (const_cast <T *> (& data.front()),
-                    size_in, type,
-                    & alldata_buffer_out.front(),
-                    & sizes_out.front(), & offsets_out.front(), type,
-                    comm);
-#else
-    int const typesize = sizeof(T);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_out.AT(n) *= typesize;
-      offsets_out.AT(n) *= typesize;
-    }
-    MPI_Allgatherv (const_cast <T *> (& data.front()),
-                    size_in * typesize, MPI_CHAR,
-                    & alldata_buffer_out.front(),
-                    & sizes_out.front(), & offsets_out.front(), MPI_CHAR,
-                    comm);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_out.AT(n) /= typesize;
-      offsets_out.AT(n) /= typesize;
-    }
-#endif
-    // cerr << "QQQ: allgatherv[4]" << endl;
-    
-    // cerr << "QQQ: allgatherv[5]" << endl;
-    return alldata_buffer_out;
-  }
-  
-  
-  
-  template <typename T>
-  vector <T>
-  alltoall (MPI_Comm const comm,
-            vector <T> const & data)
-  {
-    // Get the total number of processors
-    int num_procs;
-    MPI_Comm_size (comm, & num_procs);
-    
-    // Allocate space for all data
-    vector <T> alldata (num_procs);
-    
-    // Exchange all data vectors
-    T const dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    MPI_Alltoall (& data.front(), 1, type,
-                  & alldata.front(), 1, type,
-                  comm);
-    
-    return alldata;
-  }
-  
-  
-  
-  template <typename T>
-  vector <vector <T> >
-  alltoallv (MPI_Comm const comm,
-             vector <vector <T> > const & data)
-  {
-    // Get the total number of processors
-    int num_procs;
-    MPI_Comm_size (comm, & num_procs);
-    
-    // Exchange the sizes of the data vectors
-    vector <int> sizes_in (num_procs);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_in.AT(n) = data.AT(n).size();
-    }
-    vector <int> sizes_out (num_procs);
-    MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
-                  & sizes_out.front(), 1, MPI_INT,
-                  comm);
-    
-    // Copy vectors to data buffer
-    vector <int> offsets_in (num_procs + 1);
-    offsets_in.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
-    }
-    int const total_length_in = offsets_in.AT(num_procs);
-    vector <T> alldata_buffer_in;
-    alldata_buffer_in.reserve (total_length_in);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      alldata_buffer_in.insert (alldata_buffer_in.end(),
-                                data.AT(n).begin(), data.AT(n).end());
-    }
-    
-    // Allocate space for all data vectors
-    vector <int> offsets_out (num_procs + 1);
-    offsets_out.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
-    }
-    int const total_length_out = offsets_out.AT(num_procs);
-    vector <T> alldata_buffer_out (total_length_out);
-    
-    // Exchange all data vectors
-    T const dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    MPI_Alltoallv (& alldata_buffer_in.front(),
-                   & sizes_in.front(), & offsets_in.front(), type,
-                   & alldata_buffer_out.front(),
-                   & sizes_out.front(), & offsets_out.front(), type,
-                   comm);
-    
-    // Convert data buffer to vectors
-    vector <vector <T> > alldata_out (num_procs);
-    {
-      typename vector <T>::const_iterator p = alldata_buffer_out.begin();
-      for (int n = 0; n < num_procs; ++ n)
-      {
-        typename vector <T>::const_iterator const pold = p;
-        advance (p, sizes_out.AT(n));
-        alldata_out.AT(n).assign (pold, p);
-      }
-    }
-    
-    return alldata_out;
-  }
-  
-  
-  
-  template <typename T>
-  vector <T>
-  alltoallv1 (MPI_Comm const comm,
-              vector <vector <T> > const & data)
-  {
-    // Get the total number of processors
-    int num_procs;
-    MPI_Comm_size (comm, & num_procs);
-    
-    // Exchange the sizes of the data vectors
-    vector <int> sizes_in (num_procs);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      sizes_in.AT(n) = data.AT(n).size();
-    }
-    vector <int> sizes_out (num_procs);
-    // cerr << "QQQ: alltoallv1[1]" << endl;
-    MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
-                  & sizes_out.front(), 1, MPI_INT,
-                  comm);
-    // cerr << "QQQ: alltoallv1[2]" << endl;
-    
-#if 0
-    // Copy vectors to data buffer
-    vector <int> offsets_in (num_procs + 1);
-    offsets_in.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
-    }
-    int const total_length_in = offsets_in.AT(num_procs);
-    vector <T> alldata_buffer_in;
-    alldata_buffer_in.reserve (total_length_in);
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      alldata_buffer_in.insert (alldata_buffer_in.end(),
-                                data.AT(n).begin(), data.AT(n).end());
-    }
-    
-    // Allocate space for all data vectors
-    vector <int> offsets_out (num_procs + 1);
-    offsets_out.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
-    }
-    int const total_length_out = offsets_out.AT(num_procs);
-    vector <T> alldata_buffer_out (total_length_out);
-    
-    // Exchange all data vectors
-    T const dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    // cerr << "QQQ: alltoallv1[3]" << endl;
-    MPI_Alltoallv (& alldata_buffer_in.front(),
-                   & sizes_in.front(), & offsets_in.front(), type,
-                   & alldata_buffer_out.front(),
-                   & sizes_out.front(), & offsets_out.front(), type,
-                   comm);
-    // cerr << "QQQ: alltoallv1[4]" << endl;
-#endif
-    
-    // Allocate space for all data vectors
-    vector <int> offsets_out (num_procs + 1);
-    offsets_out.AT(0) = 0;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
-    }
-    int const total_length_out = offsets_out.AT(num_procs);
-    vector <T> alldata_buffer_out (total_length_out);
-    
-    // Exchange all data vectors
-    T const dummy;
-    MPI_Datatype const type = mpi_datatype (dummy);
-    int const tag = 4711;
-    vector <MPI_Request> reqs (2 * num_procs);
-    int nreqs = 0;
-    // cerr << "QQQ: alltoallv1[5]" << endl;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      if (sizes_out.AT(n) > 0) {
-        MPI_Irecv (& alldata_buffer_out.AT(offsets_out.AT(n)),
-                   sizes_out.AT(n),
-                   type,
-                   n, tag, comm, & reqs.AT(nreqs));
-        ++ nreqs;
-      }
-    }
-    // cerr << "QQQ: alltoallv1[6]" << endl;
-    for (int n = 0; n < num_procs; ++ n)
-    {
-      if (sizes_in.AT(n) > 0) {
-        MPI_Isend (const_cast <T *> (& data.AT(n).front()),
-                   sizes_in.AT(n),
-                   type,
-                   n, tag, comm, & reqs.AT(nreqs));
-        ++ nreqs;
-      }
-    }
-    // cerr << "QQQ: alltoallv1[7]" << endl;
-    MPI_Waitall (nreqs, & reqs.front(), MPI_STATUSES_IGNORE);
-    // cerr << "QQQ: alltoallv1[8]" << endl;
-    
-    return alldata_buffer_out;
-  }
-  
-  
   template
   vector <vector <dh::light_dboxes> >
   allgatherv (MPI_Comm comm,
@@ -628,9 +272,4 @@ namespace CarpetLib
   allgatherv1 (MPI_Comm comm,
                vector <ivect> const & data);
   
-  template
-  vector <sendrecv_pseudoregion_t>
-  alltoallv1 (MPI_Comm comm,
-              vector <vector <sendrecv_pseudoregion_t> > const & data);
-  
 } // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/mpi_string.hh b/Carpet/CarpetLib/src/mpi_string.hh
index 84951773a..8222b9b2a 100644
--- a/Carpet/CarpetLib/src/mpi_string.hh
+++ b/Carpet/CarpetLib/src/mpi_string.hh
@@ -9,6 +9,8 @@
 #  include "nompi.h"
 #endif
 
+#include "defs.hh"
+
 
 
 namespace CarpetLib
@@ -65,4 +67,365 @@ namespace CarpetLib
   alltoallv1 (MPI_Comm comm,
               vector <vector <T> > const & data);
   
+  
+  
+  //////////////////////////////////////////////////////////////////////////////
+  
+  
+  
+  template <typename T>
+  vector <vector <T> >
+  allgatherv (MPI_Comm comm,
+              vector <T> const & data)
+  {
+    // cerr << "QQQ: allgatherv[0]" << endl;
+    // Get the total number of processors
+    int num_procs;
+    MPI_Comm_size (comm, & num_procs);
+    
+    // Exchange the sizes of the data vectors
+    int const size_in = data.size();
+    assert (size_in >= 0);
+    vector <int> sizes_out (num_procs);
+    // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl;
+    MPI_Allgather (const_cast <int *> (& size_in), 1, MPI_INT,
+                   & sizes_out.front(), 1, MPI_INT,
+                   comm);
+    // cerr << "QQQ: allgatherv[2]" << endl;
+    
+    // Allocate space for all data vectors
+    vector <int> offsets_out (num_procs + 1);
+    offsets_out.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      assert (sizes_out.AT(n) >= 0);
+      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+      assert (offsets_out.AT(n + 1) >= 0);
+    }
+    int const total_length_out = offsets_out.AT(num_procs);
+    vector <T> alldata_buffer_out (total_length_out);
+    
+    // Exchange all data vectors
+    T dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    int datatypesize;
+    MPI_Type_size (type, &datatypesize);
+    // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl;
+#if 0
+    MPI_Allgatherv (const_cast <T *> (& data.front()),
+                    size_in, type,
+                    & alldata_buffer_out.front(),
+                    & sizes_out.front(), & offsets_out.front(), type,
+                    comm);
+#else
+    int const typesize = sizeof(T);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_out.AT(n) *= typesize;
+      offsets_out.AT(n) *= typesize;
+    }
+    MPI_Allgatherv (const_cast <T *> (& data.front()),
+                    size_in * typesize, MPI_CHAR,
+                    & alldata_buffer_out.front(),
+                    & sizes_out.front(), & offsets_out.front(), MPI_CHAR,
+                    comm);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_out.AT(n) /= typesize;
+      offsets_out.AT(n) /= typesize;
+    }
+#endif
+    // cerr << "QQQ: allgatherv[4]" << endl;
+    
+    // Convert data buffer to vectors
+    vector <vector <T> > alldata_out (num_procs);
+    {
+      typename vector <T>::const_iterator p = alldata_buffer_out.begin();
+      for (int n = 0; n < num_procs; ++ n)
+      {
+        typename vector <T>::const_iterator const pold = p;
+        advance (p, sizes_out.AT(n));
+        alldata_out.AT(n).assign (pold, p);
+      }
+      assert (p == alldata_buffer_out.end());
+    }
+    
+    // cerr << "QQQ: allgatherv[5]" << endl;
+    return alldata_out;
+  }
+  
+  
+  
+  template <typename T>
+  vector <T>
+  allgatherv1 (MPI_Comm comm,
+               vector <T> const & data)
+  {
+    // cerr << "QQQ: allgatherv[0]" << endl;
+    // Get the total number of processors
+    int num_procs;
+    MPI_Comm_size (comm, & num_procs);
+    
+    // Exchange the sizes of the data vectors
+    int const size_in = data.size();
+    assert (size_in >= 0);
+    vector <int> sizes_out (num_procs);
+    // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl;
+    MPI_Allgather (const_cast <int *> (& size_in), 1, MPI_INT,
+                   & sizes_out.front(), 1, MPI_INT,
+                   comm);
+    // cerr << "QQQ: allgatherv[2]" << endl;
+    
+    // Allocate space for all data vectors
+    vector <int> offsets_out (num_procs + 1);
+    offsets_out.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      assert (sizes_out.AT(n) >= 0);
+      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+      assert (offsets_out.AT(n + 1) >= 0);
+    }
+    int const total_length_out = offsets_out.AT(num_procs);
+    vector <T> alldata_buffer_out (total_length_out);
+    
+    // Exchange all data vectors
+    T dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    int datatypesize;
+    MPI_Type_size (type, &datatypesize);
+    // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl;
+#if 0
+    MPI_Allgatherv (const_cast <T *> (& data.front()),
+                    size_in, type,
+                    & alldata_buffer_out.front(),
+                    & sizes_out.front(), & offsets_out.front(), type,
+                    comm);
+#else
+    int const typesize = sizeof(T);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_out.AT(n) *= typesize;
+      offsets_out.AT(n) *= typesize;
+    }
+    MPI_Allgatherv (const_cast <T *> (& data.front()),
+                    size_in * typesize, MPI_CHAR,
+                    & alldata_buffer_out.front(),
+                    & sizes_out.front(), & offsets_out.front(), MPI_CHAR,
+                    comm);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_out.AT(n) /= typesize;
+      offsets_out.AT(n) /= typesize;
+    }
+#endif
+    // cerr << "QQQ: allgatherv[4]" << endl;
+    
+    // cerr << "QQQ: allgatherv[5]" << endl;
+    return alldata_buffer_out;
+  }
+  
+  
+  
+  template <typename T>
+  vector <T>
+  alltoall (MPI_Comm const comm,
+            vector <T> const & data)
+  {
+    // Get the total number of processors
+    int num_procs;
+    MPI_Comm_size (comm, & num_procs);
+    
+    // Allocate space for all data
+    vector <T> alldata (num_procs);
+    
+    // Exchange all data vectors
+    T const dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    MPI_Alltoall (& data.front(), 1, type,
+                  & alldata.front(), 1, type,
+                  comm);
+    
+    return alldata;
+  }
+  
+  
+  
+  template <typename T>
+  vector <vector <T> >
+  alltoallv (MPI_Comm const comm,
+             vector <vector <T> > const & data)
+  {
+    // Get the total number of processors
+    int num_procs;
+    MPI_Comm_size (comm, & num_procs);
+    
+    // Exchange the sizes of the data vectors
+    vector <int> sizes_in (num_procs);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_in.AT(n) = data.AT(n).size();
+    }
+    vector <int> sizes_out (num_procs);
+    MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
+                  & sizes_out.front(), 1, MPI_INT,
+                  comm);
+    
+    // Copy vectors to data buffer
+    vector <int> offsets_in (num_procs + 1);
+    offsets_in.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
+    }
+    int const total_length_in = offsets_in.AT(num_procs);
+    vector <T> alldata_buffer_in;
+    alldata_buffer_in.reserve (total_length_in);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      alldata_buffer_in.insert (alldata_buffer_in.end(),
+                                data.AT(n).begin(), data.AT(n).end());
+    }
+    
+    // Allocate space for all data vectors
+    vector <int> offsets_out (num_procs + 1);
+    offsets_out.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+    }
+    int const total_length_out = offsets_out.AT(num_procs);
+    vector <T> alldata_buffer_out (total_length_out);
+    
+    // Exchange all data vectors
+    T const dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    MPI_Alltoallv (& alldata_buffer_in.front(),
+                   & sizes_in.front(), & offsets_in.front(), type,
+                   & alldata_buffer_out.front(),
+                   & sizes_out.front(), & offsets_out.front(), type,
+                   comm);
+    
+    // Convert data buffer to vectors
+    vector <vector <T> > alldata_out (num_procs);
+    {
+      typename vector <T>::const_iterator p = alldata_buffer_out.begin();
+      for (int n = 0; n < num_procs; ++ n)
+      {
+        typename vector <T>::const_iterator const pold = p;
+        advance (p, sizes_out.AT(n));
+        alldata_out.AT(n).assign (pold, p);
+      }
+    }
+    
+    return alldata_out;
+  }
+  
+  
+  
+  template <typename T>
+  vector <T>
+  alltoallv1 (MPI_Comm const comm,
+              vector <vector <T> > const & data)
+  {
+    // Get the total number of processors
+    int num_procs;
+    MPI_Comm_size (comm, & num_procs);
+    
+    // Exchange the sizes of the data vectors
+    vector <int> sizes_in (num_procs);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      sizes_in.AT(n) = data.AT(n).size();
+    }
+    vector <int> sizes_out (num_procs);
+    // cerr << "QQQ: alltoallv1[1]" << endl;
+    MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
+                  & sizes_out.front(), 1, MPI_INT,
+                  comm);
+    // cerr << "QQQ: alltoallv1[2]" << endl;
+    
+#if 0
+    // Copy vectors to data buffer
+    vector <int> offsets_in (num_procs + 1);
+    offsets_in.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
+    }
+    int const total_length_in = offsets_in.AT(num_procs);
+    vector <T> alldata_buffer_in;
+    alldata_buffer_in.reserve (total_length_in);
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      alldata_buffer_in.insert (alldata_buffer_in.end(),
+                                data.AT(n).begin(), data.AT(n).end());
+    }
+    
+    // Allocate space for all data vectors
+    vector <int> offsets_out (num_procs + 1);
+    offsets_out.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+    }
+    int const total_length_out = offsets_out.AT(num_procs);
+    vector <T> alldata_buffer_out (total_length_out);
+    
+    // Exchange all data vectors
+    T const dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    // cerr << "QQQ: alltoallv1[3]" << endl;
+    MPI_Alltoallv (& alldata_buffer_in.front(),
+                   & sizes_in.front(), & offsets_in.front(), type,
+                   & alldata_buffer_out.front(),
+                   & sizes_out.front(), & offsets_out.front(), type,
+                   comm);
+    // cerr << "QQQ: alltoallv1[4]" << endl;
+#endif
+    
+    // Allocate space for all data vectors
+    vector <int> offsets_out (num_procs + 1);
+    offsets_out.AT(0) = 0;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+    }
+    int const total_length_out = offsets_out.AT(num_procs);
+    vector <T> alldata_buffer_out (total_length_out);
+    
+    // Exchange all data vectors
+    T const dummy;
+    MPI_Datatype const type = mpi_datatype (dummy);
+    int const tag = 4711;
+    vector <MPI_Request> reqs (2 * num_procs);
+    int nreqs = 0;
+    // cerr << "QQQ: alltoallv1[5]" << endl;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      if (sizes_out.AT(n) > 0) {
+        MPI_Irecv (& alldata_buffer_out.AT(offsets_out.AT(n)),
+                   sizes_out.AT(n),
+                   type,
+                   n, tag, comm, & reqs.AT(nreqs));
+        ++ nreqs;
+      }
+    }
+    // cerr << "QQQ: alltoallv1[6]" << endl;
+    for (int n = 0; n < num_procs; ++ n)
+    {
+      if (sizes_in.AT(n) > 0) {
+        MPI_Isend (const_cast <T *> (& data.AT(n).front()),
+                   sizes_in.AT(n),
+                   type,
+                   n, tag, comm, & reqs.AT(nreqs));
+        ++ nreqs;
+      }
+    }
+    // cerr << "QQQ: alltoallv1[7]" << endl;
+    MPI_Waitall (nreqs, & reqs.front(), MPI_STATUSES_IGNORE);
+    // cerr << "QQQ: alltoallv1[8]" << endl;
+    
+    return alldata_buffer_out;
+  }
+  
 } // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/region.cc b/Carpet/CarpetLib/src/region.cc
index e1904d316..ea8572b1f 100644
--- a/Carpet/CarpetLib/src/region.cc
+++ b/Carpet/CarpetLib/src/region.cc
@@ -4,6 +4,7 @@
 
 #include "bboxset.hh"
 #include "defs.hh"
+#include "mpi_string.hh"
 #include "region.hh"
 
 using namespace std;
@@ -418,3 +419,14 @@ ostream & operator<< (ostream & os, sendrecv_pseudoregion_t const & srp)
 {
   return os << "(send:" << srp.send << ",recv:" << srp.recv << ")";
 }
+
+
+  
+namespace CarpetLib {
+  
+  template
+  vector <sendrecv_pseudoregion_t>
+  alltoallv1 (MPI_Comm comm,
+              vector <vector <sendrecv_pseudoregion_t> > const & data);
+  
+}