Merge.

author: Christian Reisswig <reisswig@tapir.caltech.edu> 2010-08-27 23:05:53 -0700
committer: Barry Wardell <barry.wardell@gmail.com> 2011-12-14 18:25:17 +0000
commit: a54aeac2cd92d6742efee10bb8f4c08affe25e24 (patch)
tree: c111593ac52d19a1b9e4a6170781a937f1f20ab5
parent: 2205c4de243d010b29f651a4ecd51c32e7b161bb (diff)
parent: bed54e5df6d3f29d0d2ad473018fa35b0f994a2a (diff)
46 files changed, 1245 insertions, 510 deletions
diff --git a/Carpet/Carpet/README b/Carpet/Carpet/README
index 3d0ca4eb1..75caae781 100644
--- a/Carpet/Carpet/README
+++ b/Carpet/Carpet/README
@@ -4,7 +4,7 @@ Maintainer(s): Erik Schnetter <schnetter@cct.lsu.edu>
 Licence      : GPLv2+
 --------------------------------------------------------------------------
 
-
 1. Purpose
+
 This thorn provides a parallel AMR (adaptive mesh refinement) driver
 with MPI.
diff --git a/Carpet/Carpet/interface.ccl b/Carpet/Carpet/interface.ccl
index 023b65ff7..3dad22724 100644
--- a/Carpet/Carpet/interface.ccl
+++ b/Carpet/Carpet/interface.ccl
@@ -329,3 +329,5 @@ CCTK_REAL timing_levels TYPE=array DIM=1 SIZE=max_refinement_levels DISTRIB=cons
 {
   time_level time_level_count
 } "Per-level timing information"
+
+CCTK_INT carpet_unusedpoints_mask TYPE=GF tags='Prolongation="none"' "mask which is set !=0 for points which do not influence future evolution, assuming appropriate regridding"
diff --git a/Carpet/Carpet/param.ccl b/Carpet/Carpet/param.ccl
index ffeeaf63d..9be76541e 100644
--- a/Carpet/Carpet/param.ccl
+++ b/Carpet/Carpet/param.ccl
@@ -508,3 +508,9 @@ BOOLEAN init_3_timelevels "Set up 3 timelevels of initial data" STEERABLE=always
 BOOLEAN adaptive_stepsize "Allow adaptive timestep sizes"
 {
 } "no"
+
+BOOLEAN use_unusedpoints_mask "Turn on of off (default) storage and usage of 'unusedpoints_mask'"
+{
+  0:* :: "Anything else than 0 turns unusedpoints_mask on"
+} 0
+
diff --git a/Carpet/Carpet/schedule.ccl b/Carpet/Carpet/schedule.ccl
index 75d4097a5..b4f543e20 100644
--- a/Carpet/Carpet/schedule.ccl
+++ b/Carpet/Carpet/schedule.ccl
@@ -31,3 +31,18 @@ if (refine_timestep)
     OPTIONS: singlemap
   } "Correct time step size for spacing on finer grids"
 }
+
+if (use_unusedpoints_mask)
+{
+  storage: carpet_unusedpoints_mask
+
+  schedule CarpetUnusedMask AT BASEGRID
+  {
+    LANG: C
+  } "Set mask of unused points"
+
+  schedule CarpetUnusedMask AT POSTREGRID
+  {
+    LANG: C
+  } "Set mask of unused points"
+}
diff --git a/Carpet/Carpet/src/Comm.cc b/Carpet/Carpet/src/Comm.cc
index c01a59fa3..e335765d8 100644
--- a/Carpet/Carpet/src/Comm.cc
+++ b/Carpet/Carpet/src/Comm.cc
@@ -208,8 +208,7 @@ namespace Carpet {
         const int grouptype = CCTK_GroupTypeI (g);
         const int ml = grouptype == CCTK_GF ? mglevel : 0;
         const int rl = grouptype == CCTK_GF ? reflevel : 0;
-        const int active_tl =
-          groupdata.AT(g).activetimelevels.AT(mglevel).AT(reflevel);
+        const int active_tl = groupdata.AT(g).activetimelevels.AT(ml).AT(rl);
         assert (active_tl>=0);
         const int tl = active_tl > 1 ? timelevel : 0;
         for (int m = 0; m < (int)arrdata.AT(g).size(); ++m) {
diff --git a/Carpet/Carpet/src/Initialise.cc b/Carpet/Carpet/src/Initialise.cc
index 1ff189dd7..785f3cfdc 100644
--- a/Carpet/Carpet/src/Initialise.cc
+++ b/Carpet/Carpet/src/Initialise.cc
@@ -1053,12 +1053,23 @@ namespace Carpet {
     
     Waypoint ("Initialising three timelevels:");
     
+#if 0
     initialise_3tl_flip_timelevels (cctkGH);
     initialise_3tl_evolve (cctkGH);
     // TODO: May want to restrict here if possible (i.e. if the time
     // refinement factor is one)
     initialise_3tl_recycle (cctkGH);
     initialise_3tl_flip_timelevels (cctkGH);
+#endif
+    
+    initialise_3tl_flip_timelevels (cctkGH);
+    initialise_3tl_evolve (cctkGH);
+    initialise_3tl_evolve (cctkGH);
+    // TODO: May want to restrict where possible (i.e. if the time
+    // refinement factor is one)
+    initialise_3tl_recycle (cctkGH);
+    initialise_3tl_recycle (cctkGH);
+    initialise_3tl_flip_timelevels (cctkGH);
     
     Waypoint ("Finished initialising three timelevels");
   }
diff --git a/Carpet/Carpet/src/Recompose.cc b/Carpet/Carpet/src/Recompose.cc
index bd71f3945..257a92e8d 100644
--- a/Carpet/Carpet/src/Recompose.cc
+++ b/Carpet/Carpet/src/Recompose.cc
@@ -1047,7 +1047,7 @@ namespace Carpet {
     
     // Output
     CCTK_VInfo (CCTK_THORNSTRING,
-                "Grid structure statistics:");
+                "Global grid structure statistics:");
     CCTK_VInfo (CCTK_THORNSTRING,
                 "GF: rhs: %.0fk active, %.0fk owned (+%.0f%%), %.0fk total (+%.0f%%), %.3g steps/time",
                 double (num_active_cpu_points / 1e+3),
diff --git a/Carpet/Carpet/src/UnusedMask.cc b/Carpet/Carpet/src/UnusedMask.cc
new file mode 100644
index 000000000..6cf71caea
--- /dev/null
+++ b/Carpet/Carpet/src/UnusedMask.cc
@@ -0,0 +1,46 @@
+#include <cctk.h>
+#include <cctk_Arguments.h>
+#include <cctk_Parameters.h>
+
+#include <carpet.hh>
+
+#include <loopcontrol.h>
+
+namespace Carpet {
+  
+  using namespace std;
+  
+  void
+  CarpetUnusedMask (CCTK_ARGUMENTS)
+  {
+    DECLARE_CCTK_ARGUMENTS;
+    DECLARE_CCTK_PARAMETERS;
+
+    dh const & dd = *vdd.AT(map);
+    ibbox const & ext = dd.light_boxes.AT(mglevel).AT(reflevel).AT(component).exterior;
+    ibset const & unused_region = dd.local_boxes.AT(mglevel).AT(reflevel).AT(local_component).unused_region;
+
+    assert(dim == 3);
+    // Zero out
+    LOOP_OVER_BSET (cctkGH, ext, box, imin, imax) {
+#pragma omp parallel
+      LC_LOOP3(unused_mask_zero, i, j, k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2],
+               cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) {
+        CCTK_INT i3D = CCTK_GFINDEX3D(cctkGH, i, j, k);
+        carpet_unusedpoints_mask[i3D] = 0;
+      } LC_ENDLOOP3(unused_mask_zero);
+    } END_LOOP_OVER_BSET;
+
+    // Set it where unused
+    LOOP_OVER_BSET (cctkGH, unused_region, box, imin, imax) {
+#pragma omp parallel
+      LC_LOOP3(unused_mask_set, i, j, k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2],
+               cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) {
+        CCTK_INT i3D = CCTK_GFINDEX3D(cctkGH, i, j, k);
+        carpet_unusedpoints_mask[i3D] = 1;
+      } LC_ENDLOOP3(unused_mask_set);
+    } END_LOOP_OVER_BSET;
+
+  } // CarpetUnusedMask
+
+}
diff --git a/Carpet/Carpet/src/carpet.hh b/Carpet/Carpet/src/carpet.hh
index 3ca8535fe..37003acd3 100644
--- a/Carpet/Carpet/src/carpet.hh
+++ b/Carpet/Carpet/src/carpet.hh
@@ -23,6 +23,7 @@ namespace Carpet {
     int CarpetMultiModelStartup (void);
     void CarpetParamCheck (CCTK_ARGUMENTS);
     void CarpetRefineTimeStep (CCTK_ARGUMENTS);
+    void CarpetUnusedMask (CCTK_ARGUMENTS);
   }
   
   // Registered functions
diff --git a/Carpet/Carpet/src/functions.hh b/Carpet/Carpet/src/functions.hh
index 5f1539a36..676abea57 100644
--- a/Carpet/Carpet/src/functions.hh
+++ b/Carpet/Carpet/src/functions.hh
@@ -190,7 +190,7 @@ namespace Carpet {
                                CCTK_REAL messages, CCTK_REAL bytes);
   void UpdateTimingStats (cGH const * cctkGH);
   void PrintTimingStats (cGH const * cctkGH);
-  
+
 } // namespace Carpet
 
 #endif // !defined(FUNCTIONS_HH)
diff --git a/Carpet/Carpet/src/helpers.cc b/Carpet/Carpet/src/helpers.cc
index c3999377f..c184c8c3f 100644
--- a/Carpet/Carpet/src/helpers.cc
+++ b/Carpet/Carpet/src/helpers.cc
@@ -110,7 +110,10 @@ namespace Carpet {
     if (tl < activetimelevels) {
       int const var = varindex - CCTK_FirstVarIndexI (groupindex);
       ggf * const ff = arrdata.AT(groupindex).AT(m).data.AT(var);
-      gdata * const data = (*ff) (tl, rl, c, mglevel);
+      int const lc = arrdata.AT(groupindex).AT(m).hh->get_local_component(rl, c);
+      assert (lc >= 0 and
+              lc < arrdata.AT(groupindex).AT(m).hh->local_components(rl));
+      gdata * const data = (*ff) (tl, rl, lc, mglevel);
       return data->storage();
     } else {
       return NULL;
diff --git a/Carpet/Carpet/src/make.code.defn b/Carpet/Carpet/src/make.code.defn
index 8edcaac69..c62b631a7 100644
--- a/Carpet/Carpet/src/make.code.defn
+++ b/Carpet/Carpet/src/make.code.defn
@@ -22,6 +22,7 @@ SRCS =	CallFunction.cc				\
 	Storage.cc				\
 	Timers.cc				\
 	Timing.cc				\
+  UnusedMask.cc   \
 	helpers.cc				\
 	modes.cc				\
 	variables.cc
diff --git a/Carpet/CarpetIOHDF5/src/OutputSlice.cc b/Carpet/CarpetIOHDF5/src/OutputSlice.cc
index d6459c094..ec5cdcd2f 100644
--- a/Carpet/CarpetIOHDF5/src/OutputSlice.cc
+++ b/Carpet/CarpetIOHDF5/src/OutputSlice.cc
@@ -1319,6 +1319,7 @@ namespace CarpetIOHDF5 {
           delta[d] = (coord_upper[dirs[d]] - coord_lower[dirs[d]]) /
             (gfext.upper()[dirs[d]] - gfext.lower()[dirs[d]]) * gfext.stride()[dirs[d]];
           origin[d] += (org1[dirs[d]] - gfext.lower()[dirs[d]]) * delta[d];
+          iorigin[d] /= gfext.stride()[dirs[d]];
         }
       }
 
diff --git a/Carpet/CarpetInterp/src/interp.cc b/Carpet/CarpetInterp/src/interp.cc
index 0f809926e..065dd87cd 100644
--- a/Carpet/CarpetInterp/src/interp.cc
+++ b/Carpet/CarpetInterp/src/interp.cc
@@ -142,6 +142,7 @@ namespace CarpetInterp {
                                 vector<CCTK_INT> const & interp_num_time_levels,
                                 CCTK_INT const local_interp_handle,
                                 CCTK_INT const param_table_handle,
+                                int rl, int m, int lc,
                                 vector<int> const & num_tl,
                                 vector<bool> const & need_time_interp,
                                 CCTK_REAL const current_time,
@@ -463,10 +464,12 @@ namespace CarpetInterp {
         assert (it != homecntsmap.end());
         int const idx = it->second;
         assert (idx < (int)totalhomecnts.size());
+        int mytmpcnt;
 #pragma omp critical
         {
-          indices.AT(n) = totalhomecnts.AT(idx) + tmpcnts.AT(idx)++;
+          mytmpcnt = tmpcnts.AT(idx)++;
         }
+        indices.AT(n) = totalhomecnts.AT(idx) + mytmpcnt;
       }
       assert (tmpcnts == allhomecnts);
     }
@@ -1224,6 +1227,7 @@ namespace CarpetInterp {
       if (not (rl >= minrl and rl < maxrl) or
           not (c >= 0 and c < hh->components(rl)))
       {
+#pragma omp critical
         cout << "CI: m=" << m << " rl=" << rl << " c=" << c << " ext=" << hh->extent(ml,rl,c) << endl;
       }
       assert (rl >= minrl and rl < maxrl);
@@ -1240,13 +1244,12 @@ namespace CarpetInterp {
       rlev.AT(n) = rl;
       home.AT(n) = c;
       int const cidx = component_idx (procs.AT(n), m, rl, c);
-// only scalars of intrinsic type can be atomically updated
-//#pragma omp atomic
 #pragma omp critical
       {
+        // Increase counter, creating a new hash element if necessary
         homecntsmap[cidx]++;
       }
-
+      
     } // for n
 
     // allocate and fill the (dense) homecnts vector from the hash map
@@ -1334,71 +1337,63 @@ namespace CarpetInterp {
       }
     }
 #endif
-
-    // TODO: Don't use explicit mode switching; code the loops
-    // manually
-    BEGIN_GLOBAL_MODE(cctkGH) {
-      for (int rl=minrl; rl<maxrl; ++rl) {
-        ENTER_LEVEL_MODE (cctkGH, rl) {
-
-          // Number of neccessary time levels
-          CCTK_REAL const level_time = cctkGH->cctk_time;
-          vector<int> num_tl (N_input_arrays, 0);
-          vector<bool> need_time_interp (N_output_arrays);
-          for (int m=0; m<N_output_arrays; ++m) {
-            need_time_interp.AT(m)
-              = (num_time_derivs.AT(m) > 0
-                 or (fabs(current_time - level_time)
-                     > 1e-12 * (fabs(level_time) + fabs(current_time)
-                                + fabs(cctkGH->cctk_delta_time))));
-            assert (not (not want_global_mode
-                         and num_time_derivs.AT(m) == 0
-                         and need_time_interp.AT(m)));
-            int const n = operand_indices.AT(m);
-            num_tl.AT(n)
-              = max (num_tl.AT(n),
-                     (need_time_interp.AT(m)
-                      ? max (num_time_derivs.AT(m) + 1,
-                             prolongation_order_time + 1)
-                      : 1));
-          }
-
-          BEGIN_LOCAL_MAP_LOOP(cctkGH, CCTK_GF) {
-            BEGIN_LOCAL_COMPONENT_LOOP(cctkGH, CCTK_GF) {
-
-              for (int p = 0; p < dist::size(); p++) {
-                // short cut if there's nothing to interpolate for processor p
-                if (recvcnt[p] <= 0) continue;
-
-                int const cidx =
-                  component_idx (p, Carpet::map, reflevel, component);
-                std::map<int,int>::const_iterator it = homecntsmap.find(cidx);
-                if (it != homecntsmap.end()) {
-                  //cout << "CarpetInterp: interpolate_single_component: rl=" << reflevel << " map=" << (Carpet::map) << " component=" << component << " p=" << p << endl;
-                  int const idx = it->second;
-                  assert (idx < (int)homecnts.size());
-                  interpolate_single_component
-                    (cctkGH, coord_system_handle, coord_group,
-                     N_dims,
-                     homecnts.AT(idx), coords.AT(idx), outputs.AT(idx),
-                     per_proc_statuses[p], per_proc_retvals[p],
-                     operand_indices, time_deriv_order, interp_num_time_levels,
-                     local_interp_handle, param_table_handle,
-                     num_tl, need_time_interp, current_time, delta_time,
-                     N_input_arrays, N_output_arrays,
-                     output_array_type_codes,
-                     input_array_variable_indices);
-                }
-              } // for p
-
-            } END_LOCAL_COMPONENT_LOOP;
-          } END_LOCAL_MAP_LOOP;
-
-        } LEAVE_LEVEL_MODE;
-      } // for rl
-
-    } END_GLOBAL_MODE;
-
+    
+    int const tl = 0;
+    for (int rl=minrl; rl<maxrl; ++rl) {
+      
+      // Number of neccessary time levels
+      // CCTK_REAL const level_time = cctkGH->cctk_time;
+      CCTK_REAL const level_time = tt->get_time(mglevel, rl, tl);
+      vector<int> num_tl (N_input_arrays, 0);
+      vector<bool> need_time_interp (N_output_arrays);
+      for (int m=0; m<N_output_arrays; ++m) {
+        need_time_interp.AT(m) =
+          num_time_derivs.AT(m) > 0 or
+          (fabs(current_time - level_time) >
+           1e-12 * (fabs(level_time) + fabs(current_time) +
+                    fabs(cctkGH->cctk_delta_time)));
+        assert (not (not want_global_mode and
+                     num_time_derivs.AT(m) == 0 and
+                     need_time_interp.AT(m)));
+        int const n = operand_indices.AT(m);
+        num_tl.AT(n) =
+          max (num_tl.AT(n),
+               (need_time_interp.AT(m) ?
+                max (num_time_derivs.AT(m) + 1, prolongation_order_time + 1) :
+                1));
+      }
+      
+#warning "TODO: Loop only over those maps and components that exist for this variable group"
+      for (int m=0; m<maps; ++m) {
+        for (int lc=0; lc<vhh.AT(m)->local_components(rl); ++lc) {
+          int const c = vhh.AT(m)->get_component(rl,lc);
+          for (int p=0; p<dist::size(); ++p) {
+            // short cut if there's nothing to interpolate for processor p
+            if (recvcnt[p] <= 0) continue;
+            
+            int const cidx = component_idx (p, m, rl, c);
+            std::map<int,int>::const_iterator it = homecntsmap.find(cidx);
+            if (it != homecntsmap.end()) {
+              int const idx = it->second;
+              assert (idx < (int)homecnts.size());
+              interpolate_single_component
+                (cctkGH, coord_system_handle, coord_group,
+                 N_dims,
+                 homecnts.AT(idx), coords.AT(idx), outputs.AT(idx),
+                 per_proc_statuses[p], per_proc_retvals[p],
+                 operand_indices, time_deriv_order, interp_num_time_levels,
+                 local_interp_handle, param_table_handle,
+                 rl, m, lc,
+                 num_tl, need_time_interp, current_time, delta_time,
+                 N_input_arrays, N_output_arrays,
+                 output_array_type_codes,
+                 input_array_variable_indices);
+            }
+          } // for p
+        }   // for lc
+      }     // for m
+    }       // for rl
+    
     timer.stop (0);
   }
 
@@ -1413,11 +1408,11 @@ namespace CarpetInterp {
   class InterpolationTimes : private vector<CCTK_REAL>
   {
   public:
-    InterpolationTimes (CCTK_INT num_timelevels_ )
+    InterpolationTimes (int const rl, int const num_timelevels_ )
       : vector<CCTK_REAL> (num_timelevels_ )
     {
       for (int tl=0; tl<num_timelevels_; ++tl) {
-        at(tl) = tt->get_time (mglevel, reflevel, tl);
+        at(tl) = tt->get_time (mglevel, rl, tl);
       }
     }
 
@@ -1546,8 +1541,9 @@ namespace CarpetInterp {
       at(2) = 2 / (dt * dt * (t[2] - t[0]) * (t[2] - t[1]));
     }
   };
-
-
+  
+  
+  
   static void
   interpolate_single_component (cGH const* const cctkGH,
                                 int const coord_system_handle,
@@ -1563,6 +1559,7 @@ namespace CarpetInterp {
                                 vector<CCTK_INT> const & interp_num_time_levels,
                                 CCTK_INT const local_interp_handle,
                                 CCTK_INT const param_table_handle,
+                                int const rl, int const m, int const lc,
                                 vector<int> const & num_tl,
                                 vector<bool> const & need_time_interp,
                                 CCTK_REAL const current_time,
@@ -1574,7 +1571,7 @@ namespace CarpetInterp {
   {
     static Timer timer ("CarpetInterp::interpolate_single_component");
     timer.start();
-
+    
     // Find out the datatypes of all input grid arrays
     vector<CCTK_INT> input_array_type_codes(N_input_arrays, -1);
     vector<const void *> input_arrays(N_input_arrays);
@@ -1588,20 +1585,19 @@ namespace CarpetInterp {
     // Do the processor-local interpolation
     // Find out about the local geometry
     rvect lower, upper, delta;
-
+    
     // Get global origin and spacing of the underlying coordinate system
     int const grouptype = CCTK_GroupTypeI (coord_group);
     switch (grouptype) {
-
+      
     case CCTK_GF: {
       jvect gsh;
-      GetCoordRange (cctkGH, Carpet::map, mglevel, dim,
+      GetCoordRange (cctkGH, m, mglevel, dim,
                      & gsh[0], & lower[0], & upper[0], & delta[0]);
-      //cout << "CarpetInterp single: m=" << (Carpet::map) << " gsh=" << gsh << " lower=" << lower << " upper=" << upper << " delta=" << delta << endl;
       delta /= maxspacereflevelfact;
       break;
     }
-
+      
     case CCTK_SCALAR:
     case CCTK_ARRAY: {
 #ifdef NEW_COORD_API
@@ -1614,14 +1610,15 @@ namespace CarpetInterp {
       char const * const coord_system_name =
         CCTK_CoordSystemName (coord_system_handle);
       assert (CCTK_CoordSystemDim (coord_system_name) >= N_dims);
-
+      
       for (int d = 0; d < N_dims; ++ d) {
         int const iret = CCTK_CoordRange (cctkGH, &lower[d], &upper[d], d+1,
                                           NULL, coord_system_name);
         assert (iret == 0);
       }
-
-      int const m = 0;
+      
+      // int const m = 0;
+      assert (m == 0);          // We may be looping over too many maps
       // delta for the Carpet grid indices
       ibbox const & baseextent =
         arrdata.AT(coord_group).AT(m).hh->baseextents.AT(mglevel).AT(0);
@@ -1629,80 +1626,92 @@ namespace CarpetInterp {
 #endif
       break;
     }
-
+      
     default:
       assert (0);
     }
-
+    
     // Get processor-local origin and spacing
-    cGroupDynamicData coord_group_data;
-    CCTK_GroupDynamicData (cctkGH, coord_group, &coord_group_data);
+    // cGroupDynamicData coord_group_data;
+    // CCTK_GroupDynamicData (cctkGH, coord_group, &coord_group_data);
     // To do: do this via hh->bases instead
+    const ibbox& coarseext = vhh.AT(m)->baseextents.AT(mglevel).AT(0);
+    const ibbox& baseext = vhh.AT(m)->baseextents.AT(mglevel).AT(rl);
+    int const c = vhh.AT(m)->get_component(rl,lc);
+    const ibbox& ext = vdd.AT(m)->light_boxes.AT(mglevel).AT(rl).AT(c).exterior;
+    ivect const lsh = gdata::allocated_memory_shape (ext);
     for (int d = 0; d < N_dims; ++d) {
+      // if (grouptype == CCTK_GF) {
+      //   assert (maxspacereflevelfact[d] % cctkGH->cctk_levfac[d] == 0);
+      //   delta[d] *= maxspacereflevelfact[d] / cctkGH->cctk_levfac[d];
+      //   lower[d] += (delta[d] *
+      //                (cctkGH->cctk_lbnd[d] +
+      //                 1.0 * cctkGH->cctk_levoff[d] /
+      //                 cctkGH->cctk_levoffdenom[d]));
+      // } else {
+      //   lower[d] += delta[d] * cctkGH->cctk_lbnd[d];
+      // }
       if (grouptype == CCTK_GF) {
-        assert (maxspacereflevelfact[d] % cctkGH->cctk_levfac[d] == 0);
-        delta[d] *= maxspacereflevelfact[d] / cctkGH->cctk_levfac[d];
-        lower[d] += (delta[d] *
-                     (cctkGH->cctk_lbnd[d] +
-                      1.0 * cctkGH->cctk_levoff[d] /
-                      cctkGH->cctk_levoffdenom[d]));
+        assert (maxspacereflevelfact[d] % spacereffacts.AT(rl)[d] == 0);
+        delta[d] *= maxspacereflevelfact[d] / spacereffacts.AT(rl)[d];
+        ivect const lbnd = (ext.lower() - baseext.lower()) / ext.stride();
+        ivect const levoff = baseext.lower() - coarseext.lower();
+        ivect const levoffdenom = baseext.stride();
+        lower[d] += delta[d] * (lbnd[d] + 1.0 * levoff[d] / levoffdenom[d]);
       } else {
-        lower[d] += delta[d] * cctkGH->cctk_lbnd[d];
+        ivect const lbnd = (ext.lower() - baseext.lower()) / ext.stride();
+        lower[d] += delta[d] * lbnd[d];
       }
     }
-
+    
     void const* tmp_coords[dim];
     for (int d = 0; d < N_dims; ++d) {
       tmp_coords[d] = coords + d*npoints;
     }
-
+    
     int max_num_tl = 0;
-    for (int m=0; m<N_input_arrays; ++m) {
-      max_num_tl = max(max_num_tl, num_tl.AT(m));
+    for (int n=0; n<N_input_arrays; ++n) {
+      max_num_tl = max(max_num_tl, num_tl.AT(n));
     }
     vector<vector<void *> > tmp_output_arrays (max_num_tl);
-
+    
     for (int tl=0; tl<max_num_tl; ++tl) {
-
+      
       for (int n=0; n<N_input_arrays; ++n) {
-
+        
         int const vi = input_array_variable_indices[n];
         assert (vi == -1 or (vi >= 0 and vi < CCTK_NumVars()));
-
+        
         if (vi == -1) {
           input_arrays[n] = NULL;
         } else {
           int const interp_num_tl =
             interp_num_time_levels.AT(n) > 0 ?
             interp_num_time_levels.AT(n) : num_tl.AT(n);
-
+          
           // Do a dummy interpolation from a later timelevel
           // if the desired timelevel does not exist
           int const my_tl = tl >= interp_num_tl ? 0 : tl;
           assert (my_tl < num_tl.AT(n));
-
+          
           // Are there enough time levels?
           int const active_tl = CCTK_ActiveTimeLevelsVI (cctkGH, vi);
           if (active_tl <= my_tl) {
             char * const fullname = CCTK_FullName(vi);
             CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
                         "Grid function \"%s\" has only %d active time levels on refinement level %d; this is not enough for time interpolation",
-                        fullname, active_tl, reflevel);
+                        fullname, active_tl, rl);
             free (fullname);
           }
-
-#if 0
-          input_arrays[n] = CCTK_VarDataPtrI (cctkGH, my_tl, vi);
-#else
+          
           int const gi = CCTK_GroupIndexFromVarI (vi);
           int const vi0 = CCTK_FirstVarIndexI (gi);
-          input_arrays[n]
-            = ((*arrdata.AT(gi).AT(Carpet::map).data.AT(vi-vi0))
-               (my_tl, reflevel, local_component, mglevel)->storage());
-#endif
+          input_arrays[n] =
+            (*arrdata.AT(gi).AT(m).data.AT(vi-vi0))(my_tl, rl, lc, mglevel)->
+            storage();
         }
       } // for input arrays
-
+      
       tmp_output_arrays[tl].resize (N_output_arrays);
       for (int j=0; j<N_output_arrays; ++j) {
         if (need_time_interp.AT(j)) {
@@ -1717,22 +1726,22 @@ namespace CarpetInterp {
           tmp_output_arrays[tl][j] = outputs + j*npoints*vartypesize;
         }
       }
-
+      
       vector<CCTK_INT> per_point_status (npoints);
       int ierr = Util_TableSetPointer
         (param_table_handle, &per_point_status.front(), "per_point_status");
       assert (ierr >= 0);
-
-      vector<CCTK_INT> lsh(N_dims);
-      for (int d=0; d<N_dims; ++d) {
-        lsh.AT(d) = coord_group_data.lsh[d];
-      }
+      
+      // vector<CCTK_INT> lsh(N_dims);
+      // for (int d=0; d<N_dims; ++d) {
+      //   lsh.AT(d) = coord_group_data.lsh[d];
+      // }
       const int retval = CCTK_InterpLocalUniform
         (N_dims, local_interp_handle, param_table_handle,
          &lower[0], &delta[0],
          npoints,
          CCTK_VARIABLE_REAL, tmp_coords,
-         N_input_arrays, & lsh.front(),
+         N_input_arrays, & lsh[0],
          &input_array_type_codes.front(), &input_arrays.front(),
          N_output_arrays,
          output_array_type_codes, &tmp_output_arrays[tl].front());
@@ -1740,31 +1749,31 @@ namespace CarpetInterp {
         CCTK_VWarn (CCTK_WARN_DEBUG, __LINE__, __FILE__, CCTK_THORNSTRING,
                     "The local interpolator returned the error code %d",retval);
       }
-
+      
       overall_retval = min (overall_retval, (CCTK_INT)retval);
       for (int n = 0; n < (int)per_point_status.size(); n++) {
         overall_status = min (overall_status, per_point_status[n]);
       }
       ierr = Util_TableDeleteKey (param_table_handle, "per_point_status");
-      assert (! ierr);
-
+      assert (not ierr);
+      
     } // for tl
-
+    
     // Interpolate in time, if neccessary
     for (int j=0; j<N_output_arrays; ++j) {
       if (need_time_interp.AT(j)) {
-
+        
         // Find input array for this output array
         int const n = operand_indices.AT(j);
         CCTK_INT const deriv_order = time_deriv_order.AT(j);
-
+        
         int const interp_num_tl =
           interp_num_time_levels.AT(n) > 0 ?
           interp_num_time_levels.AT(n) : num_tl.AT(n);
-        const InterpolationTimes times (interp_num_tl);
+        const InterpolationTimes times (rl, interp_num_tl);
         const InterpolationWeights tfacs (deriv_order, times, current_time,
                                           delta_time);
-
+        
         // Interpolate
         assert (output_array_type_codes[j] == CCTK_VARIABLE_REAL);
         for (int k=0; k<npoints; ++k) {
@@ -1777,14 +1786,14 @@ namespace CarpetInterp {
             dest += tfacs[tl] * src;
           }
         }
-
+        
         for (int tl=0; tl<max_num_tl; ++tl) {
           delete [] (CCTK_REAL *)tmp_output_arrays[tl][j];
         }
-
+        
       } // if need_time_interp
     } // for j
-
+    
     timer.stop (0);
   }
 
diff --git a/Carpet/CarpetInterp/test/waveinterp-1p.par b/Carpet/CarpetInterp/test/waveinterp-1p.par
index 8a07bf51d..aa1265902 100644
--- a/Carpet/CarpetInterp/test/waveinterp-1p.par
+++ b/Carpet/CarpetInterp/test/waveinterp-1p.par
@@ -14,7 +14,9 @@ IO::parfile_write = "no"
 
 
 
-ActiveThorns = "LocalInterp AEILocalInterp LocalReduce"
+ActiveThorns = "InitBase LocalInterp AEILocalInterp LocalReduce"
+
+#InitBase::initial_data_setup_method = "init_single_level"
 
 
 
@@ -31,6 +33,7 @@ Carpet::prolongation_order_time  = 2
 Carpet::convergence_level = 0
 
 Carpet::init_3_timelevels = yes
+#Carpet::init_each_timelevel = yes
 
 
 
@@ -85,6 +88,10 @@ MoL::ODE_Method = RK3
 
 ActiveThorns = "WaveMoL"
 
+WaveMoL::num_timelevels = 3
+
+WaveMoL::bound = "flat"
+
 
 
 ActiveThorns = "IDWaveMoL"
@@ -136,6 +143,8 @@ IOScalar::outScalar_vars       = "
 
 ActiveThorns = "CarpetIOASCII"
 
+IOASCII::output_all_timelevels = yes
+
 IOASCII::out0D_every = 1
 IOASCII::out0D_vars  = "
         WaveMoL::scalarevolvemol_scalar
diff --git a/Carpet/CarpetInterp/test/waveinterp-2p.par b/Carpet/CarpetInterp/test/waveinterp-2p.par
index 8a07bf51d..d25c12b5c 100644
--- a/Carpet/CarpetInterp/test/waveinterp-2p.par
+++ b/Carpet/CarpetInterp/test/waveinterp-2p.par
@@ -14,7 +14,7 @@ IO::parfile_write = "no"
 
 
 
-ActiveThorns = "LocalInterp AEILocalInterp LocalReduce"
+ActiveThorns = "InitBase LocalInterp AEILocalInterp LocalReduce"
 
 
 
@@ -85,6 +85,8 @@ MoL::ODE_Method = RK3
 
 ActiveThorns = "WaveMoL"
 
+WaveMoL::num_timelevels = 3
+
 
 
 ActiveThorns = "IDWaveMoL"
diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc
index c59330556..6f42fe2b3 100644
--- a/Carpet/CarpetInterp2/src/fasterp.cc
+++ b/Carpet/CarpetInterp2/src/fasterp.cc
@@ -1060,6 +1060,9 @@ namespace CarpetInterp2 {
     if (verbose) CCTK_VInfo (CCTK_THORNSTRING,
                              "Interpolating %d variables", int(nvars));
     assert (values.size() == nvars);
+    
+    if (nvars == 0) return;
+    
     for (size_t v=0; v<values.size(); ++v) {
       int const vi = varinds.AT(v);
       assert (vi >= 0 and vi <= CCTK_NumVars());
diff --git a/Carpet/CarpetLib/src/balance.cc b/Carpet/CarpetLib/src/balance.cc
index e9f5cad6a..304e1e9ef 100644
--- a/Carpet/CarpetLib/src/balance.cc
+++ b/Carpet/CarpetLib/src/balance.cc
@@ -23,7 +23,7 @@ namespace CarpetLib {
     item_ifc split (CCTK_REAL ratio_new_over_old);
   };
   
-  // Declaration of template functions implementing an equivalen
+  // Declaration of template functions implementing an equivalent
   // interface
   template <typename item_t>
   CCTK_REAL
diff --git a/Carpet/CarpetLib/src/bbox.cc b/Carpet/CarpetLib/src/bbox.cc
index e0b72f1a6..c5a429d5e 100644
--- a/Carpet/CarpetLib/src/bbox.cc
+++ b/Carpet/CarpetLib/src/bbox.cc
@@ -174,6 +174,22 @@ bbox<T,D> bbox<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi) const {
   return bbox(lb,ub,str);
 }
 
+// Expand the bbox a little by multiples of a fraction of the stride
+template<typename T, int D>
+bbox<T,D> bbox<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi,
+                             const vect<T,D>& denom) const
+{
+  // Allow expansion only into directions where the extent is not negative
+  // ASSERT_BBOX (all(lower()<=upper() or (lo==T(0) and hi==T(0))));
+  ASSERT_BBOX (all(shape()>=vect<T,D>(0) or (lo==T(0) and hi==T(0))));
+  ASSERT_BBOX (all(denom>vect<T,D>(0)));
+  const vect<T,D> str = stride();
+  ASSERT_BBOX (all (str%denom==vect<T,D>(0)));
+  const vect<T,D> lb = lower() - lo * str / denom;
+  const vect<T,D> ub = upper() + hi * str / denom;
+  return bbox(lb,ub,str);
+}
+
 // Find the smallest b-compatible box around *this
 template<typename T, int D>
 bbox<T,D> bbox<T,D>::expanded_for (const bbox& b) const {
diff --git a/Carpet/CarpetLib/src/bbox.hh b/Carpet/CarpetLib/src/bbox.hh
index d2dac83cc..1c3d5d407 100644
--- a/Carpet/CarpetLib/src/bbox.hh
+++ b/Carpet/CarpetLib/src/bbox.hh
@@ -101,6 +101,9 @@ public:
   /** Get stride.  */
   vect<T,D> stride () const { return _stride; }
   
+  /** Get offset.  */
+  vect<T,D> offset () const { return (_lower % _stride + _stride) % _stride; }
+  
   /** Get the shape (or extent).  */
   vect<T,D> shape () const { return _upper - _lower + _stride; }
   
@@ -153,6 +156,21 @@ public:
   bbox expand (const vect<vect<T,D>,2>& lohi) const
   { return expand (lohi[0], lohi[1]); }
   
+  /** Shift the bbox by multiples of the stride.  */
+  bbox shift (const vect<T,D>& v) const
+  { return expand (-v, v); }
+  
+  /** Expand (enlarge) the bbox by multiples of a fraction of the
+      stride.  */
+  bbox expand (const vect<T,D>& lo, const vect<T,D>& hi,
+               const vect<T,D>& denom) const;
+  bbox expand (const vect<vect<T,D>,2>& lohi, const vect<T,D>& denom) const
+  { return expand (lohi[0], lohi[1], denom); }
+  
+  /** Shift the bbox by multiples of a fraction of the stride.  */
+  bbox shift (const vect<T,D>& v, const vect<T,D>& denom) const
+  { return expand (-v, v, denom); }
+  
   /** Find the smallest b-compatible box around this bbox.
       ("compatible" means having the same stride.)  */
   bbox expanded_for (const bbox& b) const;
diff --git a/Carpet/CarpetLib/src/bboxset.cc b/Carpet/CarpetLib/src/bboxset.cc
index 7ffd61699..a830c0b73 100644
--- a/Carpet/CarpetLib/src/bboxset.cc
+++ b/Carpet/CarpetLib/src/bboxset.cc
@@ -464,7 +464,9 @@ bboxset<T,D> bboxset<T,D>::pseudo_inverse (const int n) const {
 }
 
 template<typename T, int D>
-bboxset<T,D> bboxset<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi) const {
+bboxset<T,D> bboxset<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi)
+  const
+{
   // We don't know (yet?) how to shrink a set
   assert (all (lo>=0 and hi>=0));
   bboxset res;
@@ -475,6 +477,27 @@ bboxset<T,D> bboxset<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi) con
 }
 
 template<typename T, int D>
+bboxset<T,D> bboxset<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi,
+                                   const vect<T,D>& denom) const
+{
+  assert (all(denom > vect<T,D>(0)));
+  bboxset res;
+  if (all (lo == -hi)) {
+    // Special case for shifting, since this is faster
+    for (const_iterator bi=begin(); bi!=end(); ++bi) {
+      res += (*bi).expand(lo,hi,denom);
+    }
+  } else {
+    // We don't know (yet?) how to shrink a set
+    assert (all ((lo>=0 and hi>=0) or (lo == hi)));
+    for (const_iterator bi=begin(); bi!=end(); ++bi) {
+      res |= (*bi).expand(lo,hi,denom);
+    }
+  }
+  return res;
+}
+
+template<typename T, int D>
 bboxset<T,D> bboxset<T,D>::expanded_for (const box& b) const {
   bboxset res;
   for (const_iterator bi=begin(); bi!=end(); ++bi) {
diff --git a/Carpet/CarpetLib/src/bboxset.hh b/Carpet/CarpetLib/src/bboxset.hh
index 5624ef4b1..23b2dc18c 100644
--- a/Carpet/CarpetLib/src/bboxset.hh
+++ b/Carpet/CarpetLib/src/bboxset.hh
@@ -142,11 +142,26 @@ public:
   /** Find the pseudo-inverse.  */
   bboxset pseudo_inverse (const int n) const;
   
-  /** Expand (enlarge) the bbox by multiples of the stride.  */
+  /** Expand (enlarge) the bboxset by multiples of the stride.  */
   bboxset expand (const vect<T,D>& lo, const vect<T,D>& hi) const;
   bboxset expand (const vect<vect<T,D>,2>& lohi) const
   { return expand (lohi[0], lohi[1]); }
   
+  /** Shift the bboxset by multiples of the stride.  */
+  bboxset shift (const vect<T,D>& v) const
+  { return expand (-v, v); }
+  
+  /** Expand (enlarge) the bboxset by multiples of a fraction of the
+      stride.  */
+  bboxset expand (const vect<T,D>& lo, const vect<T,D>& hi,
+                  const vect<T,D>& denom) const;
+  bboxset expand (const vect<vect<T,D>,2>& lohi, const vect<T,D>& denom) const
+  { return expand (lohi[0], lohi[1], denom); }
+  
+  /** Shift the bboxset by multiples of a fraction of the stride.  */
+  bboxset shift (const vect<T,D>& v, const vect<T,D>& denom) const
+  { return expand (-v, v, denom); }
+  
   /** Find the smallest b-compatible box around this bbox.
       ("compatible" means having the same stride.)  */
   bboxset expanded_for (const box& b) const;
diff --git a/Carpet/CarpetLib/src/commstate.cc b/Carpet/CarpetLib/src/commstate.cc
index a340c0896..f3a6d7ebf 100644
--- a/Carpet/CarpetLib/src/commstate.cc
+++ b/Carpet/CarpetLib/src/commstate.cc
@@ -223,9 +223,7 @@ void comm_state::step ()
                 size_t const nbytes =
                   procbuf.sendbufsize * datatypesize *
                   (message_size_multiplier - 1);
-#warning "TODO"
-                // memset (procbuf.sendbuf, poison_value, nbytes);
-                memset (procbuf.sendbuf, 0, nbytes);
+                memset (procbuf.sendbuf, poison_value, nbytes);
               }
               
               int const tag = type;
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 484582073..2c76c6a3b 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -975,16 +975,52 @@ transfer_restrict (data const * const src,
                         this->extent(),
                         box);
       break;
-    case cell_centered:
-      call_operator<T> (& restrict_3d_cc_rf2,
-                        static_cast <T const *> (src->storage()),
-                        src->shape(),
-                        static_cast <T *> (this->storage()),
-                        this->shape(),
-                        src->extent(),
-                        this->extent(),
-                        box);
+    case cell_centered: {
+      assert (all (box.stride() == this->extent().stride()));
+      ivect const izero (0);
+      ivect const ioff = box.lower() - this->extent().lower();
+      ivect const is_centered = ioff % this->extent().stride() == izero;
+      if (all(is_centered == ivect(1,1,1))) {
+        call_operator<T> (& restrict_3d_cc_rf2,
+                          static_cast <T const *> (src->storage()),
+                          src->shape(),
+                          static_cast <T *> (this->storage()),
+                          this->shape(),
+                          src->extent(),
+                          this->extent(),
+                          box);
+      } else if (all(is_centered == ivect(0,1,1))) {
+        call_operator<T> (& restrict_3d_vc_rf2<T,0,1,1>,
+                          static_cast <T const *> (src->storage()),
+                          src->shape(),
+                          static_cast <T *> (this->storage()),
+                          this->shape(),
+                          src->extent(),
+                          this->extent(),
+                          box);
+      } else if (all(is_centered == ivect(1,0,1))) {
+        call_operator<T> (& restrict_3d_vc_rf2<T,1,0,1>,
+                          static_cast <T const *> (src->storage()),
+                          src->shape(),
+                          static_cast <T *> (this->storage()),
+                          this->shape(),
+                          src->extent(),
+                          this->extent(),
+                          box);
+      } else if (all(is_centered == ivect(1,1,0))) {
+        call_operator<T> (& restrict_3d_vc_rf2<T,1,1,0>,
+                          static_cast <T const *> (src->storage()),
+                          src->shape(),
+                          static_cast <T *> (this->storage()),
+                          this->shape(),
+                          src->extent(),
+                          this->extent(),
+                          box);
+      } else {
+        assert (0);
+      }
       break;
+    }
     default:
       assert (0);
     }
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc
index 9679b3fd7..7ecba6785 100644
--- a/Carpet/CarpetLib/src/dh.cc
+++ b/Carpet/CarpetLib/src/dh.cc
@@ -28,6 +28,27 @@ list<dh*> dh::alldh;
 
 
 
+vect<vect<dh::srpvect dh::fast_dboxes::*,2>,dim>
+dh::fast_dboxes::
+fast_ref_refl_sendrecv;
+
+void
+dh::fast_dboxes::
+init_fast_ref_refl_sendrecv ()
+{
+  static bool initialised = false;
+  if (initialised) return;
+  initialised = true;
+  fast_ref_refl_sendrecv[0][0] = & fast_dboxes::fast_ref_refl_sendrecv_0_0;
+  fast_ref_refl_sendrecv[0][1] = & fast_dboxes::fast_ref_refl_sendrecv_0_1;
+  fast_ref_refl_sendrecv[1][0] = & fast_dboxes::fast_ref_refl_sendrecv_1_0;
+  fast_ref_refl_sendrecv[1][1] = & fast_dboxes::fast_ref_refl_sendrecv_1_1;
+  fast_ref_refl_sendrecv[2][0] = & fast_dboxes::fast_ref_refl_sendrecv_2_0;
+  fast_ref_refl_sendrecv[2][1] = & fast_dboxes::fast_ref_refl_sendrecv_2_1;
+}
+
+
+
 // Constructors
 dh::
 dh (gh & h_,
@@ -37,6 +58,7 @@ dh (gh & h_,
     ghost_widths(ghost_widths_), buffer_widths(buffer_widths_),
     prolongation_orders_space(prolongation_orders_space_)
 {
+  fast_dboxes::init_fast_ref_refl_sendrecv();
   size_t const maxreflevels = h.reffacts.size();
   assert (ghost_widths.size() >= maxreflevels);
   assert (buffer_widths.size() >= maxreflevels);
@@ -923,8 +945,8 @@ regrid (bool const do_init)
             full_dboxes const & box = full_level.AT(c);
             full_dboxes const & obox0 = full_boxes.AT(ml).AT(orl).AT(0);
             
-            // Refinement restriction may fill all active points, and
-            // must use all active points
+            // Refinement restriction may fill all coarse active
+            // points, and must use all fine active points
             
             ibset needrecv (box.active.contracted_for (obox0.interior));
             
@@ -932,7 +954,7 @@ regrid (bool const do_init)
               full_dboxes & obox = full_boxes.AT(ml).AT(orl).AT(cc);
               
               ibset const contracted_active
-                (box.active.contracted_for (obox0.interior));
+                (box.active.contracted_for (obox.interior));
               ibset const ovlp = obox.active & contracted_active;
               
               for (ibset::const_iterator
@@ -975,6 +997,10 @@ regrid (bool const do_init)
       static Carpet::Timer timer_mask ("CarpetLib::dh::regrid::mask");
       timer_mask.start();
       
+      // Declare this here to save it for later use. Contains all the boxes
+      // which are active minus the boundary
+      ibset all_refined;
+
       if (rl > 0) {
         int const orl = rl - 1;
         full_cboxes const& full_olevel = full_boxes.AT(ml).AT(orl);
@@ -985,97 +1011,143 @@ regrid (bool const do_init)
         ivect const reffact = h.reffacts.AT(rl) / h.reffacts.AT(orl);
         
         // This works only when the refinement factor is 2
-        if (all (reffact == 2)) {
-          
-          ibbox const& base = domain_exterior;
-          ibbox const& obase = h.baseextent(ml,orl);
-          
-          // Calculate the union of all coarse regions
-          ibset const allointr (full_olevel, & full_dboxes::interior);
-          
-          // Project to current level
-          ivect const rf(reffact);
-          // TODO: Why expand by rf-1?  This expansion shouldn't
-          // matter, since the coarse grid is larger than the fine
-          // grid anyway, except at the outer boundary
-          // ibset const parent (allointr.expand(rf-1,rf-1).contracted_for(base));
-          ibset const parent (allointr.expanded_for(base));
-          
-          // Subtract the active region
-          ibset const notrefined = parent - allactive;
-          
-          // Enlarge this set
-          vect<ibset,dim> enlarged;
-          for (int d=0; d<dim; ++d) {
-            switch (h.refcent) {
-            case vertex_centered: {
-              ivect const dir = ivect::dir(d);
-              enlarged[d] = ibset (notrefined.expand(dir, dir));
-              break;
-            }
-            case cell_centered: {
-              enlarged[d] = notrefined;
+        assert (all (reffact == 2));
+        
+        ibbox const& base = domain_exterior;
+        ibbox const& obase = h.baseextent(ml,orl);
+        
+        // Calculate the union of all coarse regions
+        ibset const allointr (full_olevel, & full_dboxes::interior);
+        
+        // Project to current level
+        ivect const rf(reffact);
+        ibset const parent (allointr.expanded_for(base));
+        
+        // Subtract the active region
+        ibset const notrefined = parent - allactive;
+        
+        // Enlarge this set
+        vect<ibset,dim> enlarged;
+        for (int d=0; d<dim; ++d) {
+          switch (h.refcent) {
+          case vertex_centered: {
+            ivect const dir = ivect::dir(d);
+            enlarged[d] = ibset (notrefined.expand(dir, dir));
+            break;
+          }
+          case cell_centered: {
+            enlarged[d] = notrefined;
 #warning "TODO: restriction boundaries are wrong (they are empty, but should not be) with cell centring when fine cell cut coarse cells"
-              bool const old_there_was_an_error = there_was_an_error;
-              ASSERT_rl (notrefined.contracted_for(obase).expanded_for(base) ==
-                         notrefined,
-                         "Refinement mask: Fine grid boundaries must be aligned with coarse grid cells");
-              // Do not abort because of this problem
-              there_was_an_error = old_there_was_an_error;
-              break;
-            }
-            default:
-              assert (0);
-            }
+            bool const old_there_was_an_error = there_was_an_error;
+            ASSERT_rl (notrefined.contracted_for(obase).expanded_for(base) ==
+                       notrefined,
+                       "Refinement mask: Fine grid boundaries must be aligned with coarse grid cells");
+            // Do not abort because of this problem
+            there_was_an_error = old_there_was_an_error;
+            break;
+          }
+          default:
+            assert (0);
           }
+        }
+        
+        // Intersect with the active region
+        vect<ibset,dim> all_boundaries;
+        for (int d=0; d<dim; ++d) {
+          all_boundaries[d] = allactive & enlarged[d];
+        }
+        
+#warning "TODO: Ensure that the prolongation boundaries all_boundaries are contained in the boundary prolongated region"
+        
+#warning "TODO: Ensure that the restriction boundaries and the restricted region are contained in the restricted region"
+        
+        // Subtract the boundaries from the refined region
+        all_refined = allactive;
+        for (int d=0; d<dim; ++d) {
+          all_refined -= all_boundaries[d];
+        }
+        
+        for (int lc = 0; lc < h.local_components(orl); ++ lc) {
+          int const c = h.get_component(orl, lc);
+          //full_dboxes & box = full_level.AT(c);
+          full_dboxes const& obox = full_olevel.AT(c);
+          //local_dboxes & local_box = local_level.AT(lc);
+          // Local boxes are not communicated or post-processed, and
+          // thus can be modified even for coarser levels
+          local_dboxes & local_obox = local_olevel.AT(lc);
+          
+          // Set restriction information for next coarser level
+          local_obox.restricted_region =
+            all_refined.contracted_for(obox.exterior) & obox.owned;
           
-          // Intersect with the active region
-          vect<ibset,dim> all_boundaries;
+          // Set prolongation information for current level
           for (int d=0; d<dim; ++d) {
-            all_boundaries[d] = allactive & enlarged[d];
+            local_obox.restriction_boundaries[d] =
+              all_boundaries[d].contracted_for(obox.exterior) & obox.owned;
           }
           
-#warning "TODO: Ensure that the prolongation boundaries all_boundaries are contained in the boundary prolongated region"
-          
-#warning "TODO: Ensure that the restriction boundaries and the restricted region are contained in the restricted region"
+        } // for lc
+        
+        for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+          int const c = h.get_component(rl, lc);
+          full_dboxes const& box = full_level.AT(c);
+          local_dboxes & local_box = local_level.AT(lc);
           
-          // Subtract the boundaries from the refined region
-          ibset all_refined = allactive;
+          // Set prolongation information for current level
           for (int d=0; d<dim; ++d) {
-            all_refined -= all_boundaries[d];
+            local_box.prolongation_boundaries[d] =
+              all_boundaries[d] & box.owned;
           }
           
-          for (int lc = 0; lc < h.local_components(rl); ++ lc) {
-            int const c = h.get_component(rl, lc);
-            full_dboxes & box = full_level.AT(c);
+        } // for lc
+        
+      } // if not coarsest level
+      
+      timer_mask.stop();
+      
+      
+      
+      // Mask for unused points on coarser level (which do not influence the future
+      // evolution provided regridding is done at the right times):
+      static Carpet::Timer timer_overwrittenmask ("CarpetLib::dh::regrid::unusedpoints_mask");
+      timer_mask.start();
+      
+      if (rl > 0) {
+        int const orl = rl - 1;
+        full_cboxes const& full_olevel = full_boxes.AT(ml).AT(orl);
+        // Local boxes are not communicated or post-processed, and
+        // thus can be modified even for coarser levels
+        local_cboxes& local_olevel = local_boxes.AT(ml).AT(orl);
+
+        // This works only when the refinement factor is 2
+        ivect const reffact = h.reffacts.AT(rl) / h.reffacts.AT(orl);
+        if (all (reffact == 2)) {
+          // use the already computed 'all_refined' to get region from where
+          // no information will be used later (overwritten)
+          // First: get the region which will get restricted, on the coarse level
+          ibset restricted_region = all_refined.contracted_for(h.baseextent(ml,orl));
+          // This is too big - during MoL-substeps information within this
+          // region will be used to update points outside -> need to
+          // shrink it by some points
+          // The way we shrink it is to invert it, expand that, and invert
+          // again. To invert we define an enclosing box and subtract it from that.
+          i2vect to_shrink = buffer_widths[orl] + ghost_widths[orl];
+          ibbox enclosing = restricted_region.container().expand(ivect(1)+to_shrink);
+          ibset unused_region = enclosing - (enclosing - restricted_region).expand(to_shrink);
+          // Now we have the interesting region in 'unused_region' and need to store
+          // the union of this and the local regions
+          for (int lc = 0; lc < h.local_components(orl); ++ lc) {
+            int const c = h.get_component(orl, lc);
             full_dboxes const& obox = full_olevel.AT(c);
-            local_dboxes & local_box = local_level.AT(lc);
             // Local boxes are not communicated or post-processed, and
             // thus can be modified even for coarser levels
             local_dboxes & local_obox = local_olevel.AT(lc);
-            
-            // Set restriction information for next coarser level
-            local_obox.restricted_region =
-              all_refined.contracted_for(obox.exterior) & obox.owned;
-            
-            // Set prolongation information for current level
-            for (int d=0; d<dim; ++d) {
-              local_obox.restriction_boundaries[d] =
-                all_boundaries[d].contracted_for(obox.exterior) & obox.owned;
-            }
-            
-            // Set prolongation information for current level
-            for (int d=0; d<dim; ++d) {
-              local_box.prolongation_boundaries[d] =
-                all_boundaries[d] & box.owned;
-            }
-            
+            // Set unused information for next coarser level
+            local_obox.unused_region = unused_region & obox.owned;
           } // for lc
-          
         } // if reffact != 2
-        
       } // if not coarsest level
-      
+
       timer_mask.stop();
       
       
@@ -1087,8 +1159,10 @@ regrid (bool const do_init)
       // If there is no coarser level, do nothing
       if (rl > 0) {
         int const orl = rl - 1;
-        full_cboxes const& full_olevel = full_boxes.AT(ml).AT(orl);
+        light_cboxes & light_olevel = light_boxes.AT(ml).AT(orl);
         local_cboxes & local_olevel = local_boxes.AT(ml).AT(orl);
+        full_cboxes const& full_olevel = full_boxes.AT(ml).AT(orl);
+        fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl);
         
         // This works only with cell centred grids
         if (h.refcent == cell_centered) {
@@ -1099,39 +1173,22 @@ regrid (bool const do_init)
           ivect const izero = ivect (0);
           ivect const ione  = ivect (1);
           
+          assert (all (h.reffacts.AT(rl) % h.reffacts.AT(orl) == 0));
+          ivect const reffact = h.reffacts.AT(rl) / h.reffacts.AT(orl);
+          
           vect<vect<ibset,2>,dim> all_fine_boundary;
           
           for (int dir=0; dir<dim; ++dir) {
             // Unit vector
             ivect const idir = ivect::dir(dir);
             
-            // Fine grids with boundary
-            ibset fine_level_plus;
-            for (ibset::const_iterator fine_level_i = fine_level.begin();
-                 fine_level_i != fine_level.end();
-                 ++ fine_level_i)
-            {
-              // Expand region to the left (but not to the right),
-              // since the fluxes are staggered by one half to the
-              // right
-              fine_level_plus |= fine_level_i->expand (idir, izero);
-            }
+            // Left and right faces
+            ibset const fine_level_minus = fine_level.shift (-idir, 2);
+            ibset const fine_level_plus  = fine_level.shift (+idir, 2);
             
-            // Fine grids without boundary
-            ibset fine_level_minus;
-            for (ibset::const_iterator fine_level_i = fine_level.begin();
-                 fine_level_i != fine_level.end();
-                 ++ fine_level_i)
-            {
-              // Shrink region at the left boundary (but not at the
-              // right boundary), since the fluxes are staggered by
-              // one half to the right
-              fine_level_minus |= fine_level_i->expand (izero, -idir);
-            }
-            
-            // Fine boundary only
-            all_fine_boundary[dir][0] = fine_level_plus - fine_level      ;
-            all_fine_boundary[dir][1] = fine_level      - fine_level_minus;
+            // Fine boundaries
+            all_fine_boundary[dir][0] = fine_level_minus - fine_level_plus ;
+            all_fine_boundary[dir][1] = fine_level_plus  - fine_level_minus;
           } // for dir
           
           
@@ -1145,37 +1202,15 @@ regrid (bool const do_init)
           for (int dir=0; dir<3; ++dir) {
             // Unit vector
             ivect const idir = ivect::dir(dir);
+            ibbox const coarse_faces = coarse_ext.shift(idir,2);
             for (int face=0; face<2; ++face) {
-              for (ibset::const_iterator
-                     all_fine_boundary_i = all_fine_boundary[dir][face].begin();
-                   all_fine_boundary_i != all_fine_boundary[dir][face].end();
-                   ++ all_fine_boundary_i)
-              {
-                ibbox const& fb = *all_fine_boundary_i;
-                
-                ivect const cstr = coarse_ext.stride();
-                
-                ivect const flo  = fb.lower();
-                ivect const fup  = fb.upper();
-                ivect const fstr = fb.stride();
-                
-                assert (all (h.reffacts.AT(rl) % h.reffacts.AT(orl) == 0));
-                ivect const reffact = h.reffacts.AT(rl) / h.reffacts.AT(orl);
-                assert (all (reffact % 2 == 0));
-                assert (all (fstr % 2 == 0));
-                assert (all (cstr % 2 == 0));
-                
-                ibbox const ftmp (flo + idir*(fstr/2-cstr/2),
-                                  fup + idir*(fstr/2-cstr/2),
-                                  fstr);
-                ibbox const ctmp = ftmp.contracted_for (coarse_ext);
-                
-                ivect const clo = ctmp.lower();
-                ivect const cup = ctmp.upper();
-                ibbox const cb (clo, cup, cstr);
-                
-                all_coarse_boundary[dir][face] += cb;
-              }
+              cout << "REFREF rl=" << rl << " dir=" << dir << " face=" << face << "\n"
+                   << "   all_fine_boundary=" << all_fine_boundary[dir][face] << "\n"
+                   << "   coarse_ext=" << coarse_ext << "\n"
+                   << "   coarse_faces=" << coarse_faces << "\n";
+              all_coarse_boundary[dir][face] =
+                all_fine_boundary[dir][face].contracted_for (coarse_faces);
+              cout << "   all_coarse_boundary=" << all_coarse_boundary[dir][face] << "\n";
             } // for face
           }   // for dir
           
@@ -1192,6 +1227,8 @@ regrid (bool const do_init)
                 // This is not really used (only for debugging)
                 local_box.fine_boundary[dir][face] =
                   box.exterior & all_fine_boundary[dir][face];
+                cout << "REFREF rl=" << rl << " lc=" << lc << " dir=" << dir << " face=" << face << "\n"
+                     << "   local.fine_boundary=" << local_box.fine_boundary[dir][face] << "\n";
                 
               } // for face
             }   // for dir
@@ -1206,16 +1243,178 @@ regrid (bool const do_init)
             for (int dir = 0; dir < dim; ++ dir) {
               for (int face = 0; face < 2; ++ face) {
                 
-#warning "TODO: Set up communication schedule for refluxing"
-                
+                // This is used for post-processing the fluxes
+                // (calculating the difference between coarse and fine
+                // fluxes, adjusting the state)
                 local_obox.coarse_boundary[dir][face] =
                   obox.exterior & all_coarse_boundary[dir][face];
+                cout << "REFREF orl=" << orl << " lc=" << lc << " dir=" << dir << " face=" << face << "\n"
+                     << "   local.coarse_boundary=" << local_obox.coarse_boundary[dir][face] << "\n";
+                
+              } // for face
+            }   // for dir
+            
+          } // for lc
+          
+          for (int lc = 0; lc < h.local_components(orl); ++ lc) {
+            int const oc = h.get_component(orl, lc);
+            light_dboxes & light_obox = light_olevel.AT(oc);
+            local_dboxes & local_obox = local_olevel.AT(lc);
+            cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " oc=" << oc << "\n";
+            
+            for (int dir = 0; dir < dim; ++ dir) {
+              for (int face = 0; face < 2; ++ face) {
+                // Unit vector
+                ivect const idir = ivect::dir(dir);
+                
+                srpvect fast_dboxes::* const fast_ref_refl_sendrecv =
+                  fast_dboxes::fast_ref_refl_sendrecv[dir][face];
+                
+                // Refluxing must fill all coarse refluxing boundary
+                // points, and may use all fine points
+                
+                ibset needrecv (local_obox.coarse_boundary[dir][face]);
+                
+                for (int c = 0; c < h.components(rl); ++ c) {
+                  full_dboxes const & box = full_level.AT(c);
+                  cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " oc=" << oc << " c=" << c << " dir=" << dir << " face=" << face << "\n";
+                  
+                  ibbox const contracted_exterior =
+                    box.exterior.contracted_for (light_obox.exterior);
+                  cout << "   exterior=" << box.exterior << "\n"
+                       << "   contracted=" << contracted_exterior << "\n";
+                  ibset const ovlp = needrecv & contracted_exterior;
+                  cout << "   ovlp=" << ovlp << "\n";
+                  
+                  for (ibset::const_iterator
+                         ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+                  {
+                    ibbox const & recv = * ri;
+                    ibbox const send = recv.expanded_for (box.exterior);
+                    ASSERT_c (send <= box.exterior,
+                              "Refinement restriction: Send region must be contained in exterior");
+                    
+                    sendrecv_pseudoregion_t const preg (send, c, recv, oc);
+                    cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << " oc=" << oc << " dir=" << dir << " face=" << face << "\n"
+                         << "   preg=" << preg << "\n";
+                    (fast_olevel.*fast_ref_refl_sendrecv).push_back (preg);
+                    if (not on_this_proc (orl, oc)) {
+                      fast_dboxes & fast_level_otherproc =
+                        fast_level_otherprocs.AT(this_proc(orl, oc));
+                      (fast_level_otherproc.*fast_ref_refl_sendrecv).
+                        push_back (preg);
+                    }
+                  }
+                  
+                  needrecv -= ovlp;
+                  
+                } // for c
+                
+                // All points must have been received
+                if (not needrecv.empty()) {
+                  cout << "needrecv=" << needrecv << endl;
+                }
+                ASSERT_rl (needrecv.empty(),
+                           "Refinement refluxing: All points must have been received");
                 
               } // for face
             }   // for dir
             
           } // for lc
           
+#if 0
+          for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+            int const c = h.get_component(rl, lc);
+            full_dboxes const & box = full_level.AT(c);
+            local_dboxes const & local_box = local_level.AT(lc);
+            full_dboxes const & obox0 = full_boxes.AT(ml).AT(orl).AT(0);
+            cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << "\n";
+            
+            for (int dir = 0; dir < dim; ++ dir) {
+              for (int face = 0; face < 2; ++ face) {
+                // Unit vector
+                ivect const idir = ivect::dir(dir);
+                
+                srpvect fast_dboxes::* const fast_ref_refl_sendrecv =
+                  fast_dboxes::fast_ref_refl_sendrecv[dir][face];
+                
+                // Refluxing may fill all coarse active points, and
+                // must use all fine refluxing boundary points
+                
+                // ibset needrecv (box.active.contracted_for (obox0.interior));
+                // ibset needrecv
+                //   (local_box.coarse_boundary[dir][face].
+                //    contracted_for (obox0.interior));
+#error "should this be local_obox instead of local_box?"
+                ibset needrecv
+                  (local_box.coarse_boundary[dir][face].
+                   shift (idir*(1-reffact), 2).
+                   contracted_for (obox0.interior));
+                cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << " dir=" << dir << " face=" << face << "\n"
+                     << "   coarse_boundary=" << local_box.coarse_boundary[dir][face] << "\n"
+                     << "   shifted=" << local_box.coarse_boundary[dir][face].shift (idir*(1-reffact), 2) << "\n"
+                     << "   needrecv=" << needrecv << "\n";
+                
+                for (int oc = 0; oc < h.components(orl); ++ oc) {
+                  full_dboxes & obox = full_boxes.AT(ml).AT(orl).AT(oc);
+                  cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << " oc=" << oc << " dir=" << dir << " face=" << face << "\n";
+                  
+                  ibset const contracted_boundary
+                    (local_box.fine_boundary[dir][face].
+                     contracted_for (obox.interior));
+                  cout << "   fine_boundary=" << local_box.fine_boundary[dir][face] << "\n";
+                  cout << "   contracted_boundary=" << contracted_boundary << "\n";
+                  ibset const ovlp = obox.active & contracted_boundary;
+                  cout << "   ovlp=" << ovlp << "\n";
+                  
+                  for (ibset::const_iterator
+                         ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+                  {
+                    ibbox const & recv = * ri;
+#warning "TODO: need to use correct coarse/fine transition"
+                    ibbox const send = recv.expanded_for (box.interior);
+                    ASSERT_c (send <= box.active,
+                              "Refinement restriction: Send region must be contained in active part");
+                    
+                    // Modify the extents since the flux grid
+                    // functions are staggered
+                    assert (all (send.stride() % 2 == 0));
+                    ibbox const msend (send.lower() + idir * send.stride()/2,
+                                       send.upper() + idir * send.stride()/2,
+                                       send.stride());
+                    assert (all (recv.stride() % 2 == 0));
+                    ibbox const mrecv (recv.lower() + idir * recv.stride()/2,
+                                       recv.upper() + idir * recv.stride()/2,
+                                       recv.stride());
+                    
+                    sendrecv_pseudoregion_t const preg (msend, c, mrecv, oc);
+                    (fast_olevel.*fast_ref_refl_sendrecv).push_back (preg);
+                    cout << "REF ref_refl ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << " oc=" << oc << " dir=" << dir << " face=" << face << " preg=" << preg << "\n";
+                    if (not on_this_proc (orl, oc)) {
+                      fast_dboxes & fast_level_otherproc =
+                        fast_level_otherprocs.AT(this_proc(orl, oc));
+                      (fast_level_otherproc.*fast_ref_refl_sendrecv).
+                        push_back (preg);
+                    }
+                  }
+                  
+                  needrecv -= ovlp;
+                  
+                } // for oc
+            
+                // All points must have been received
+                if (not needrecv.empty()) {
+                  cout << "needrecv=" << needrecv << endl;
+                }
+                ASSERT_rl (needrecv.empty(),
+                           "Refinement refluxing: All points must have been received");
+                
+              } // for face
+            }   // for dir
+            
+          } // for lc
+#endif
+          
         } // if cell centered
         
       } // if rl > 0
@@ -1452,7 +1651,25 @@ regrid (bool const do_init)
           timer_bcast_comm_ref_rest.start();
           broadcast_schedule (fast_level_otherprocs, fast_olevel,
                               & fast_dboxes::fast_ref_rest_sendrecv);
-        timer_bcast_comm_ref_rest.stop();
+          timer_bcast_comm_ref_rest.stop();
+        }
+        
+        if (rl > 0) {
+          int const orl = rl - 1;
+          fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl);
+          static Carpet::Timer timer_bcast_comm_ref_refl
+            ("CarpetLib::dh::regrid::bcast_comm::ref_refl");
+          timer_bcast_comm_ref_refl.start();
+          for (int dir = 0; dir < dim; ++ dir) {
+            for (int face = 0; face < 2; ++ face) {
+              srpvect fast_dboxes::* const fast_ref_refl_sendrecv =
+                fast_dboxes::fast_ref_refl_sendrecv[dir][face];
+              
+              broadcast_schedule (fast_level_otherprocs, fast_olevel,
+                                  fast_ref_refl_sendrecv);
+            }
+          }
+          timer_bcast_comm_ref_refl.stop();
         }
         
         // TODO: Maybe broadcast old2new schedule only if do_init is
@@ -1499,25 +1716,7 @@ regrid (bool const do_init)
           cout << box;
         } // for c
         
-        for (int c = 0; c < h.components(rl); ++ c) {
-          light_dboxes const & box = light_boxes.AT(ml).AT(rl).AT(c);
-          cout << eol;
-          cout << "ml=" << ml << " rl=" << rl << " c=" << c << eol;
-          cout << box;
-        } // for c
-        
-        for (int lc = 0; lc < h.local_components(rl); ++ lc) {
-          int const c = h.get_component (rl, lc);
-          local_dboxes const & box = local_boxes.AT(ml).AT(rl).AT(lc);
-          cout << eol;
-          cout << "ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << eol;
-          cout << box;
-        } // for lc
-        
-        fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl);
-        cout << eol;
-        cout << "ml=" << ml << " rl=" << rl << eol;
-        cout << fast_box;
+        // light, local, and fast boxes are output later
         
       } // if output_bboxes
       
@@ -1563,6 +1762,46 @@ regrid (bool const do_init)
   // Output:
   if (output_bboxes or there_was_an_error) {
     
+    for (int ml = 0; ml < h.mglevels(); ++ ml) {
+      for (int rl = 0; rl < h.reflevels(); ++ rl) {
+        
+        cout << eol;
+        cout << "ml=" << ml << " rl=" << rl << eol;
+        cout << "baseextent=" << h.baseextent(ml,rl) << eol;
+        
+        for (int c = 0; c < h.components(rl); ++ c) {
+          cout << eol;
+          cout << "ml=" << ml << " rl=" << rl << " c=" << c << eol;
+          cout << "extent=" << h.extent(ml,rl,c) << eol;
+          cout << "outer_boundaries=" << h.outer_boundaries(rl,c) << eol;
+          cout << "processor=" << h.outer_boundaries(rl,c) << eol;
+        } // for c
+        
+        // full boxes have already been output (and deallocated)
+    
+        for (int c = 0; c < h.components(rl); ++ c) {
+          light_dboxes const & box = light_boxes.AT(ml).AT(rl).AT(c);
+          cout << eol;
+          cout << "ml=" << ml << " rl=" << rl << " c=" << c << eol;
+          cout << box;
+        } // for c
+        
+        for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+          int const c = h.get_component (rl, lc);
+          local_dboxes const & box = local_boxes.AT(ml).AT(rl).AT(lc);
+          cout << eol;
+          cout << "ml=" << ml << " rl=" << rl << " lc=" << lc << " c=" << c << eol;
+          cout << box;
+        } // for lc
+        
+        fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl);
+        cout << eol;
+        cout << "ml=" << ml << " rl=" << rl << eol;
+        cout << fast_box;
+        
+      } // for rl
+    }   // for ml
+    
     cout << eol;
     cout << "memoryof(gh)=" << memoryof(h) << eol;
     cout << "memoryof(dh)=" << memoryof(*this) << eol;
@@ -1823,6 +2062,12 @@ mpi_datatype (dh::fast_dboxes const &)
       ENTRY (dh::srpvect, fast_ref_bnd_prol_sendrecv),
       ENTRY (dh::srpvect, fast_old2new_sync_sendrecv),
       ENTRY (dh::srpvect, fast_old2new_ref_prol_sendrecv),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_0_0),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_0_1),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_1_0),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_1_1),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_2_0),
+      ENTRY (dh::srpvect, fast_ref_refl_sendrecv_2_1),
       {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"}
     };
 #undef ENTRY
@@ -1960,6 +2205,13 @@ memory ()
     memoryof (fast_ref_rest_sendrecv) +
     memoryof (fast_sync_sendrecv) +
     memoryof (fast_ref_bnd_prol_sendrecv) +
+    memoryof (fast_ref_refl_sendrecv_0_0) +
+    memoryof (fast_ref_refl_sendrecv_0_1) +
+    memoryof (fast_ref_refl_sendrecv_1_0) +
+    memoryof (fast_ref_refl_sendrecv_1_1) +
+    memoryof (fast_ref_refl_sendrecv_2_0) +
+    memoryof (fast_ref_refl_sendrecv_2_1) +
+    memoryof (do_init) +
     memoryof (fast_old2new_sync_sendrecv) +
     memoryof (fast_old2new_ref_prol_sendrecv);
 }
@@ -2233,6 +2485,13 @@ output (ostream & os)
      << "   fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol
      << "   fast_sync_sendrecv: " << fast_sync_sendrecv << eol
      << "   fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol
+     << "   fast_ref_refl_sendrecv_0_0: " << fast_ref_refl_sendrecv_0_0 << eol
+     << "   fast_ref_refl_sendrecv_0_1: " << fast_ref_refl_sendrecv_0_1 << eol
+     << "   fast_ref_refl_sendrecv_1_0: " << fast_ref_refl_sendrecv_1_0 << eol
+     << "   fast_ref_refl_sendrecv_1_1: " << fast_ref_refl_sendrecv_1_1 << eol
+     << "   fast_ref_refl_sendrecv_2_0: " << fast_ref_refl_sendrecv_2_0 << eol
+     << "   fast_ref_refl_sendrecv_2_1: " << fast_ref_refl_sendrecv_2_1 << eol
+     << "   do_init: " << do_init << eol
      << "   fast_old2new_sync_sendrecv: " << fast_old2new_sync_sendrecv << eol
      << "   fast_old2new_ref_prol_sendrecv: " << fast_old2new_ref_prol_sendrecv << eol
      << "}" << eol;
diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh
index 83e44e0f0..c25001c31 100644
--- a/Carpet/CarpetLib/src/dh.hh
+++ b/Carpet/CarpetLib/src/dh.hh
@@ -81,6 +81,7 @@ public:
     
     // Mask
     ibset restricted_region;                // filled by restriction
+    ibset unused_region;                    // not used (overwritten later) region
     vect<ibset,dim> restriction_boundaries; // partly filled by restriction
     vect<ibset,dim> prolongation_boundaries; // partly used by prolongation
     
@@ -139,6 +140,22 @@ public:
     srpvect fast_sync_sendrecv;
     srpvect fast_ref_bnd_prol_sendrecv;
     
+    // refluxing
+    srpvect fast_ref_refl_sendrecv_0_0;
+    srpvect fast_ref_refl_sendrecv_0_1;
+    srpvect fast_ref_refl_sendrecv_1_0;
+    srpvect fast_ref_refl_sendrecv_1_1;
+    srpvect fast_ref_refl_sendrecv_2_0;
+    srpvect fast_ref_refl_sendrecv_2_1;
+    // Note: Unfortunately we can't use an array of srpvects since
+    // this doesn't work with C++ member pointers.  We instead define
+    // them explicitly above (bah!), and maintain a static array with
+    // member pointers for easier access.
+    static
+    vect<vect<srpvect fast_dboxes::*,2>,dim> fast_ref_refl_sendrecv;
+    static
+    void init_fast_ref_refl_sendrecv ();
+    
     // Regridding schedule:
     
     bool do_init;               // the srpvects below are only defined
diff --git a/Carpet/CarpetLib/src/fulltree.hh b/Carpet/CarpetLib/src/fulltree.hh
index dacbe82f1..aa01b1ba9 100644
--- a/Carpet/CarpetLib/src/fulltree.hh
+++ b/Carpet/CarpetLib/src/fulltree.hh
@@ -26,6 +26,7 @@ using namespace std;
 // increased by the stride to arrive at such intervals.)
 
 
+
 // Generic arithmetic search
 template <typename T>
 static
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index 4219b80c3..0d8980a39 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -137,7 +137,8 @@ transfer_from (comm_state & state,
     assert (all(dstbox.lower() >= extent().lower()));
     assert (all(dstbox.upper() <= extent().upper()));
     assert (all(dstbox.stride() == extent().stride()));
-    assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0));
+    // This is not satisfied for refluxing
+    // assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0));
   }
   
   if (is_src) {
diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc
index 02b90bc38..32723571d 100644
--- a/Carpet/CarpetLib/src/ggf.cc
+++ b/Carpet/CarpetLib/src/ggf.cc
@@ -443,13 +443,13 @@ ggf::
 ref_restrict_all (comm_state & state,
                   int const tl, int const rl, int const ml)
 {
+  if (transport_operator == op_none or transport_operator == op_sync) return;
+  static Timer timer ("ref_restrict_all");
+  timer.start ();
   // Require same times
   static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature");
   assert (abs(t.get_time(ml,rl,tl) - t.get_time(ml,rl+1,tl)) <=
           1.0e-8 * (1.0 + abs(t.get_time(ml,rl,tl))));
-  if (transport_operator == op_none or transport_operator == op_sync) return;
-  static Timer timer ("ref_restrict_all");
-  timer.start ();
   transfer_from_all (state,
                      tl,rl  ,ml,
                      & dh::fast_dboxes::fast_ref_rest_sendrecv,
@@ -487,6 +487,29 @@ ref_prolongate_all (comm_state & state,
 
 
 
+// Reflux a refinement level
+void
+ggf::
+ref_reflux_all (comm_state & state,
+                int const tl, int const rl, int const ml,
+                int const dir, int const face)
+{
+  // Ignore the transport operator
+  static Timer timer ("ref_reflux_all");
+  timer.start ();
+  // Require same times
+  static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature");
+  assert (abs(t.get_time(ml,rl,tl) - t.get_time(ml,rl+1,tl)) <=
+          1.0e-8 * (1.0 + abs(t.get_time(ml,rl,tl))));
+  transfer_from_all (state,
+                     tl,rl,ml,
+                     dh::fast_dboxes::fast_ref_refl_sendrecv[dir][face],
+                     tl,rl+1,ml);
+  timer.stop (0);
+}
+
+
+
 // Transfer regions of all components
 void
 ggf::
@@ -549,6 +572,8 @@ transfer_from_all (comm_state & state,
     pseudoregion_t const & precv = (* ipsendrecv).recv;
     ibbox const & send = psend.extent;
     ibbox const & recv = precv.extent;
+    assert (all (send.stride() == h.baseextent(ml2,rl2).stride()));
+    assert (all (recv.stride() == h.baseextent(ml1,rl1).stride()));
     int const c2 = psend.component;
     int const c1 = precv.component;
     int const lc2 = h.get_local_component(rl2,c2);
diff --git a/Carpet/CarpetLib/src/ggf.hh b/Carpet/CarpetLib/src/ggf.hh
index 3badcff14..d9412f822 100644
--- a/Carpet/CarpetLib/src/ggf.hh
+++ b/Carpet/CarpetLib/src/ggf.hh
@@ -143,6 +143,10 @@ public:
   // Prolongate a refinement level
   void ref_prolongate_all (comm_state& state,
                            int tl, int rl, int ml, CCTK_REAL time);
+
+  // Reflux a refinement level
+  void ref_reflux_all (comm_state& state,
+                       int tl, int rl, int ml, int dir, int face);
   
   
   
diff --git a/Carpet/CarpetLib/src/gh.cc b/Carpet/CarpetLib/src/gh.cc
index b281031a4..23e823257 100644
--- a/Carpet/CarpetLib/src/gh.cc
+++ b/Carpet/CarpetLib/src/gh.cc
@@ -405,7 +405,7 @@ rpos2ipos1 (rvect const & rpos,
 // Find the refinement level and component to which a grid point
 // belongs.  This uses a tree search over the superregions in the grid
 // struction, which should scale reasonably (O(n log n)) better with
-// the number of componets components.
+// the number of components.
 void
 gh::
 locate_position (rvect const & rpos,
diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc
index bb66c3b8f..a7c447ec3 100644
--- a/Carpet/CarpetLib/src/mem.cc
+++ b/Carpet/CarpetLib/src/mem.cc
@@ -97,7 +97,8 @@ mem<T>::
   assert (not has_clients());
   if (owns_storage_) {
     delete [] storage_;
-    total_allocated_bytes -= vectorlength_ * nelems_ * sizeof (T);
+    const double nbytes = vectorlength_ * nelems_ * sizeof (T);
+    total_allocated_bytes -= nbytes;
   }
   -- total_allocated_objects;
 }
diff --git a/Carpet/CarpetLib/src/mpi_string.cc b/Carpet/CarpetLib/src/mpi_string.cc
index e0022e3e0..b7e1f9b0e 100644
--- a/Carpet/CarpetLib/src/mpi_string.cc
+++ b/Carpet/CarpetLib/src/mpi_string.cc
@@ -22,6 +22,81 @@ namespace CarpetLib
   
   
   vector <string>
+  gather_string (MPI_Comm const comm,
+                 int const root,
+                 string const & data)
+  {
+    // Get my rank
+    int rank;
+    MPI_Comm_rank (comm, & rank);
+    
+    if (rank == root) {
+      
+      // Get the total number of processors
+      int num_procs;
+      MPI_Comm_size (comm, & num_procs);
+      
+      // Gather the lengths of the data strings
+      int const length = data.length();
+      vector <int> lengths (num_procs);
+      
+      MPI_Gather (const_cast <int *> (& length), 1, MPI_INT,
+                  & lengths.front(), 1, MPI_INT,
+                  root, comm);
+      
+      // Allocate space for all data strings
+      vector <int> offsets (num_procs + 1);
+      offsets.AT(0) = 0;
+      for (int n = 0; n < num_procs; ++ n)
+      {
+        offsets.AT(n + 1) = offsets.AT(n) + lengths.AT(n);
+      }
+      int const total_length = offsets.AT(num_procs);
+      vector <char> alldata_buffer (total_length);
+      
+      // Gather all data strings
+      MPI_Gatherv (const_cast <char *> (data.c_str()), length, MPI_CHAR,
+                   & alldata_buffer.front(),
+                   const_cast <int *> (& lengths.front()),
+                   const_cast <int *> (& offsets.front()),
+                   MPI_CHAR,
+                   root, comm);
+      
+      // Convert data buffer with C strings to C++ strings
+      vector <string> alldata  (num_procs);
+      for (int n = 0; n < num_procs; ++ n)
+      {
+        alldata.AT(n) =
+          string (& alldata_buffer.AT (offsets.AT(n)), lengths.AT(n));
+      }
+      
+      return alldata;
+      
+    } else {
+      
+      // Gather the lengths of the data strings
+      int const length = data.length();
+      
+      MPI_Gather (const_cast <int *> (& length), 1, MPI_INT,
+                  NULL, 1, MPI_INT,
+                  root, comm);
+      
+      // Gather all data strings
+      MPI_Gatherv (const_cast <char *> (data.c_str()), length, MPI_CHAR,
+                   NULL, NULL, NULL, MPI_CHAR,
+                   root, comm);
+      
+      // Convert data buffer with C strings to C++ strings
+      vector <string> alldata;
+      
+      return alldata;
+      
+    }
+  }
+  
+  
+  
+  vector <string>
   allgather_string (MPI_Comm const comm,
                     string const & data)
   {
diff --git a/Carpet/CarpetLib/src/mpi_string.hh b/Carpet/CarpetLib/src/mpi_string.hh
index 6ed8d447d..f05910139 100644
--- a/Carpet/CarpetLib/src/mpi_string.hh
+++ b/Carpet/CarpetLib/src/mpi_string.hh
@@ -17,6 +17,10 @@ namespace CarpetLib
   // String communication
   
   vector <string>
+  gather_string (MPI_Comm comm, int root,
+                 string const & data);
+  
+  vector <string>
   allgather_string (MPI_Comm comm,
                     string const & data);
   
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
index 8393d4af5..ac15bf8f5 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
@@ -206,9 +206,9 @@ namespace CarpetLib {
     
     
     
-    size_t const srcdi = 1;
-    size_t const srcdj = srcdi * srciext;
-    size_t const srcdk = srcdj * srcjext;
+    size_t const srcdi = SRCIND3(1,0,0) - SRCIND3(0,0,0);
+    size_t const srcdj = SRCIND3(0,1,0) - SRCIND3(0,0,0);
+    size_t const srcdk = SRCIND3(0,0,1) - SRCIND3(0,0,0);
     
     
     
@@ -334,7 +334,7 @@ namespace CarpetLib {
     // kernel
   l8101:
     dst[DSTIND3(id,jd,kd)] =
-      interp2<T> (& src[SRCIND3(is,js,ks)], srcdi, srcdj);
+      interp2<T> (& src[SRCIND3(is,js,ks)], srcdi, srcdk);
     i = i+1;
     id = id+1;
     is = is+1;
diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
index 17401c727..17ed66218 100644
--- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
@@ -132,8 +132,8 @@ namespace CarpetLib {
                       ivect3 const & restrict srcext,
                       T * restrict const dst,
                       ivect3 const & restrict dstext,
-                      ibbox3 const & restrict srcbbox,
-                      ibbox3 const & restrict dstbbox,
+                      ibbox3 const & restrict srcbbox0,
+                      ibbox3 const & restrict dstbbox0,
                       ibbox3 const & restrict regbbox)
   {
     DECLARE_CCTK_PARAMETERS;
@@ -147,6 +147,13 @@ namespace CarpetLib {
     
     
     
+    // Correct bboxes, since the fluxes are stored as if they were
+    // cell-centered
+    ibbox const srcbbox = srcbbox0.shift(icent-1,2);
+    ibbox const dstbbox = dstbbox0.shift(icent-1,2);
+    
+    
+    
     if (any (srcbbox.stride() >= regbbox.stride() or
              dstbbox.stride() != regbbox.stride()))
     {
@@ -178,7 +185,7 @@ namespace CarpetLib {
     
     
     ivect3 const regext = regbbox.shape() / regbbox.stride();
-    assert (all (either (cent, srcbbox.stride() % 2 == 0, true)));
+    assert (all (srcbbox.stride() % either (cent, 2, 1) == 0));
     if (not (all ((regbbox.lower() - srcbbox.lower() -
                    either (cent, srcbbox.stride() / 2, 0)) %
                   srcbbox.stride() == 0)))
diff --git a/Carpet/CarpetLib/src/vect.cc b/Carpet/CarpetLib/src/vect.cc
index b02a46979..58def3ca1 100644
--- a/Carpet/CarpetLib/src/vect.cc
+++ b/Carpet/CarpetLib/src/vect.cc
@@ -71,6 +71,7 @@ template class vect<int,2>;
 template class vect<int,3>;
 template class vect<int,4>;
 
+template void vect<unsigned long long,dim>::input (istream& is);
 template void vect<CCTK_REAL,dim>::input (istream& is);
 template void vect<vect<bool,2>,dim>::input (istream& is);
 template void vect<vect<bool,dim>,2>::input (istream& is);
@@ -78,6 +79,7 @@ template void vect<vect<int,dim>,2>::input (istream& is);
 
 template void vect<bool,2>::output (ostream& os) const;
 template void vect<bool,dim>::output (ostream& os) const;
+template void vect<unsigned long long,dim>::output (ostream& os) const;
 template void vect<CCTK_REAL,2>::output (ostream& os) const;
 template void vect<CCTK_REAL,dim>::output (ostream& os) const;
 template void vect<vect<bool,2>,dim>::output (ostream& os) const;
diff --git a/Carpet/CarpetReduce/src/reduce.cc b/Carpet/CarpetReduce/src/reduce.cc
index 79b9540a9..a7efe2c4c 100644
--- a/Carpet/CarpetReduce/src/reduce.cc
+++ b/Carpet/CarpetReduce/src/reduce.cc
@@ -36,39 +36,39 @@ namespace CarpetReduce {
   // Helper functions and types
   
   // Whether a value is nan
-  template<typename T> inline int
+  template<typename T> static inline int
   myisnan (const T x)
   {
     return isnan(x);
   }
   
   // The minimum of two values
-  template<typename T> inline T
+  template<typename T> static inline T
   mymin (const T x, const T y)
   {
     return min(x, y);
   }
   
   // The maximum of two values
-  template<typename T> inline T
+  template<typename T> static inline T
   mymax (const T x, const T y)
   {
     return max(x, y);
   }
   
-  template<typename T> inline T
+  template<typename T> static inline T
   mysqr (const T x)
   {
     return x*x;
   }
   
-  template<typename T> inline T
+  template<typename T> static inline T
   mysqrabs (const T x)
   {
     return x*x;
   }
   
-  template<typename T> inline T
+  template<typename T> static inline T
   mysqrt (const T x)
   {
     return sqrt(x);
@@ -91,7 +91,7 @@ namespace CarpetReduce {
       // For two's complement integers, it is min < - max, and for
       // floating point numbers, it is min = - max.  The expression
       // below does the right thing in both cases.
-      return mymin (numeric_limits<T>::min(), -numeric_limits<T>::max());
+      return CarpetReduce::mymin (numeric_limits<T>::min(), -numeric_limits<T>::max());
     }
     
     // The largest possible value
@@ -120,7 +120,7 @@ namespace CarpetReduce {
   // The C++ compiler should supply these, but some old ones do not,
   // e.g. our beloved workhorse Intel 7.1.  Self is the man.
 #ifdef HAVE_CCTK_BYTE
-//   template<> inline int myisnan (CCTK_BYTE const x)
+//   template<> static inline int myisnan (CCTK_BYTE const x)
 //   {
 //     return 0;
 //   }
@@ -145,7 +145,7 @@ namespace CarpetReduce {
 #endif
   
 #ifdef HAVE_CCTK_INT1
-//   template<> inline int myisnan (CCTK_INT1 const x)
+//   template<> static inline int myisnan (CCTK_INT1 const x)
 //   {
 //     return 0;
 //   }
@@ -170,7 +170,7 @@ namespace CarpetReduce {
 #endif
   
 #ifdef HAVE_CCTK_INT2
-//   template<> inline int myisnan (CCTK_INT2 const x)
+//   template<> static inline int myisnan (CCTK_INT2 const x)
 //   {
 //     return 0;
 //   }
@@ -258,15 +258,15 @@ namespace CarpetReduce {
   template<> inline complex<CCTK_REAL4>
   mymin (const complex<CCTK_REAL4> x, const complex<CCTK_REAL4> y)
   {
-    return complex<CCTK_REAL4> (mymin(x.real(), y.real()),
-                                mymin(x.imag(), y.imag()));
+    return complex<CCTK_REAL4> (CarpetReduce::mymin(x.real(), y.real()),
+                                CarpetReduce::mymin(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL4>
   mymax (const complex<CCTK_REAL4> x, const complex<CCTK_REAL4> y)
   {
-    return complex<CCTK_REAL4> (mymax(x.real(), y.real()),
-                                mymax(x.imag(), y.imag()));
+    return complex<CCTK_REAL4> (CarpetReduce::mymax(x.real(), y.real()),
+                                CarpetReduce::mymax(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL4>
@@ -307,15 +307,15 @@ namespace CarpetReduce {
   template<> inline complex<CCTK_REAL8>
   mymin (const complex<CCTK_REAL8> x, const complex<CCTK_REAL8> y)
   {
-    return complex<CCTK_REAL8> (mymin(x.real(), y.real()),
-                                mymin(x.imag(), y.imag()));
+    return complex<CCTK_REAL8> (CarpetReduce::mymin(x.real(), y.real()),
+                                CarpetReduce::mymin(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL8>
   mymax (const complex<CCTK_REAL8> x, const complex<CCTK_REAL8> y)
   {
-    return complex<CCTK_REAL8> (mymax(x.real(), y.real()),
-                                mymax(x.imag(), y.imag()));
+    return complex<CCTK_REAL8> (CarpetReduce::mymax(x.real(), y.real()),
+                                CarpetReduce::mymax(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL8>
@@ -356,15 +356,15 @@ namespace CarpetReduce {
   template<> inline complex<CCTK_REAL16>
   mymin (const complex<CCTK_REAL16> x, const complex<CCTK_REAL16> y)
   {
-    return complex<CCTK_REAL16> (mymin(x.real(), y.real()),
-                                 mymin(x.imag(), y.imag()));
+    return complex<CCTK_REAL16> (CarpetReduce::mymin(x.real(), y.real()),
+                                 CarpetReduce::mymin(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL16>
   mymax (const complex<CCTK_REAL16> x, const complex<CCTK_REAL16> y)
   {
-    return complex<CCTK_REAL16> (mymax(x.real(), y.real()),
-                                 mymax(x.imag(), y.imag()));
+    return complex<CCTK_REAL16> (CarpetReduce::mymax(x.real(), y.real()),
+                                 CarpetReduce::mymax(x.imag(), y.imag()));
   }
   
   template<> inline complex<CCTK_REAL16>
@@ -435,8 +435,8 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = my_numeric_limits<T>::max(); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = mymin(accum, val); cnt += T(weight); }
-      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = mymin(accum,accum2); cnt += cnt2; }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = CarpetReduce::mymin(accum, val); cnt += T(weight); }
+      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = CarpetReduce::mymin(accum,accum2); cnt += cnt2; }
       static inline void finalise (T& accum, const T& cnt) { }
     };
     MPI_Op mpi_op () const { return MPI_MIN; }
@@ -449,8 +449,8 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = my_numeric_limits<T>::min(); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = mymax(accum, val); cnt += T(weight); }
-      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = mymax(accum,accum2); cnt += cnt2; }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = CarpetReduce::mymax(accum, val); cnt += T(weight); }
+      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = CarpetReduce::mymax(accum,accum2); cnt += cnt2; }
       static inline void finalise (T& accum, const T& cnt) { }
     };
     MPI_Op mpi_op () const { return MPI_MAX; }
@@ -505,7 +505,7 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = T(0); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*mysqr(val); cnt += T(weight); }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*CarpetReduce::mysqr(val); cnt += T(weight); }
       static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum += accum2; cnt += cnt2; }
       static inline void finalise (T& accum, const T& cnt) { }
     };
@@ -519,7 +519,7 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = T(0); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*mysqrabs(val); cnt += T(weight); }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*CarpetReduce::mysqrabs(val); cnt += T(weight); }
       static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum += accum2; cnt += cnt2; }
       static inline void finalise (T& accum, const T& cnt) { }
     };
@@ -561,9 +561,9 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = T(0); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*mysqrabs(val); cnt += T(weight); }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum += weight*CarpetReduce::mysqrabs(val); cnt += T(weight); }
       static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum += accum2; cnt += cnt2; }
-      static inline void finalise (T& accum, const T& cnt) { accum = mysqrt(accum / cnt); }
+      static inline void finalise (T& accum, const T& cnt) { accum = CarpetReduce::mysqrt(accum / cnt); }
     };
     MPI_Op mpi_op () const { return MPI_SUM; }
   };
@@ -575,8 +575,8 @@ namespace CarpetReduce {
     template<class T>
     struct op {
       static inline void initialise (T& accum, T& cnt) { accum = T(0); cnt = T(0); }
-      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = mymax(accum, T(abs(val))); cnt += T(weight); }
-      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = mymax(accum,accum2); cnt += cnt2; }
+      static inline void reduce (T& accum, T& cnt, const T& val, const CCTK_REAL weight) { if (weight!=0) accum = CarpetReduce::mymax(accum, T(abs(val))); cnt += T(weight); }
+      static inline void combine (T& accum, T& cnt, const T& accum2, const T& cnt2) { accum = CarpetReduce::mymax(accum,accum2); cnt += cnt2; }
       static inline void finalise (T& accum, const T& cnt) { }
     };
     MPI_Op mpi_op () const { return MPI_MAX; }
diff --git a/Carpet/CarpetRegrid2/param.ccl b/Carpet/CarpetRegrid2/param.ccl
index 46bd9f6c2..5f87e9a9a 100644
--- a/Carpet/CarpetRegrid2/param.ccl
+++ b/Carpet/CarpetRegrid2/param.ccl
@@ -132,9 +132,9 @@ CCTK_REAL movement_threshold_1 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_1 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_1 "Minimum RELATIVE change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -200,9 +200,9 @@ CCTK_REAL movement_threshold_2 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_2 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_2 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -268,9 +268,9 @@ CCTK_REAL movement_threshold_3 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_3 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_3 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -336,9 +336,9 @@ CCTK_REAL movement_threshold_4 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_4 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_4 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -404,9 +404,9 @@ CCTK_REAL movement_threshold_5 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_5 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_5 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -472,9 +472,9 @@ CCTK_REAL movement_threshold_6 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_6 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_6 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -540,9 +540,9 @@ CCTK_REAL movement_threshold_7 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_7 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_7 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -608,9 +608,9 @@ CCTK_REAL movement_threshold_8 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_8 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_8 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -676,9 +676,9 @@ CCTK_REAL movement_threshold_9 "Minimum movement to trigger a regridding" STEERA
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_9 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_9 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
 
 
@@ -744,7 +744,7 @@ CCTK_REAL movement_threshold_10 "Minimum movement to trigger a regridding" STEER
   0:* :: ""
 } 0.0
 
-CCTK_REAL radius_change_threshold_10 "Minimum change in radius to trigger a regridding" STEERABLE=always
+CCTK_REAL radius_rel_change_threshold_10 "Minimum change in radius to trigger a regridding" STEERABLE=always
 {
-  0:* :: ""
+  0.0:* :: ""
 } 0.0
diff --git a/Carpet/CarpetRegrid2/src/regrid.cc b/Carpet/CarpetRegrid2/src/regrid.cc
index 918bfeee9..95561d8c1 100644
--- a/Carpet/CarpetRegrid2/src/regrid.cc
+++ b/Carpet/CarpetRegrid2/src/regrid.cc
@@ -32,18 +32,11 @@ namespace CarpetRegrid2 {
   
   
   
-  typedef bboxset <int, dim> ibboxset;
-  typedef vect <vect <CCTK_INT, 2>, dim> jjvect;
-  typedef vect <CCTK_REAL, dim> rvect;
-  typedef bbox <CCTK_REAL, dim> rbbox;
-  
-  
-  
   struct centre_description {
-    int            num_levels;
-    int            active;
-    rvect          position;
-    vector <rvect> radius;
+    int            _num_levels;
+    int            _active;
+    rvect          _position;
+    vector <rvect> _radius;
     
     centre_description (cGH const * cctkGH, int n);
   };
@@ -63,21 +56,21 @@ namespace CarpetRegrid2 {
     int lsh[2];
     getvectorindex2 (cctkGH, "CarpetRegrid2::radii", lsh);
     
-    this->num_levels = num_levels[n];
-    this->active = active[n];
-    this->position = rvect (position_x[n], position_y[n], position_z[n]);
-    if (any (not isfinite(this->position))) {
+    this->_num_levels = num_levels[n];
+    this->_active = active[n];
+    this->_position = rvect (position_x[n], position_y[n], position_z[n]);
+    if (any (not isfinite(this->_position))) {
       CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
                   "The position of region %d is [%g,%g,%g], which is not finite",
                   n + 1,
-                  double(this->position[0]),
-                  double(this->position[1]),
-                  double(this->position[2]));
+                  double(this->_position[0]),
+                  double(this->_position[1]),
+                  double(this->_position[2]));
       found_error = true;
     }
-    this->radius.resize (this->num_levels);
-    this->radius.at(0) = rvect(-1.0, -1.0, -1.0); // unused
-    for (int rl = 1; rl < this->num_levels; ++ rl) {
+    this->_radius.resize (this->_num_levels);
+    this->_radius.at(0) = rvect(-1.0, -1.0, -1.0); // unused
+    for (int rl = 1; rl < this->_num_levels; ++ rl) {
       int const ind = index2 (lsh, rl, n);
       CCTK_REAL const rx = radius_x[ind] < 0.0 ? radius[ind] : radius_x[ind];
       CCTK_REAL const ry = radius_y[ind] < 0.0 ? radius[ind] : radius_y[ind];
@@ -87,27 +80,27 @@ namespace CarpetRegrid2 {
         CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
                     "The radius of refinement level %d of region %d is [%g,%g,%g], which is not finite",
                     rl, n + 1,
-                    double(this->radius.at(rl)[0]),
-                    double(this->radius.at(rl)[1]),
-                    double(this->radius.at(rl)[2]));
+                    double(this->_radius.at(rl)[0]),
+                    double(this->_radius.at(rl)[1]),
+                    double(this->_radius.at(rl)[2]));
         found_error = true;
       }
       if (any (rad < 0.0)) {
         CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
                     "The radius of refinement level %d of region %d is [%g,%g,%g], which is non-negative",
                     rl, n + 1,
-                    double(this->radius.at(rl)[0]),
-                    double(this->radius.at(rl)[1]),
-                    double(this->radius.at(rl)[2]));
+                    double(this->_radius.at(rl)[0]),
+                    double(this->_radius.at(rl)[1]),
+                    double(this->_radius.at(rl)[2]));
         found_error = true;
       }
-      this->radius.at(rl) = rad;
+      this->_radius.at(rl) = rad;
     }
     
-    if (this->num_levels > maxreflevels) {
+    if (this->_num_levels > maxreflevels) {
       CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
                   "Region %d has %d levels active, which is larger than the maximum number of refinement levels %d",
-                  n + 1, this->num_levels, maxreflevels);
+                  n + 1, this->_num_levels, maxreflevels);
       found_error = true;
     }
     
@@ -426,7 +419,7 @@ namespace CarpetRegrid2 {
       rpos2ipos1 (physical_upper, origin, scale, hh, 0);
     
     // The set of refined regions
-    vector <ibboxset> regions (min_rl);
+    vector <ibset> regions (min_rl);
     
     // Set up coarse levels
     for (int rl = 0; rl < min_rl; ++ rl) {
@@ -446,18 +439,18 @@ namespace CarpetRegrid2 {
       // Loop over all centres
       for (int n = 0; n < num_centres; ++ n) {
         centre_description centre (cctkGH, n);
-        if (centre.active) {
+        if (centre._active) {
           
           // Loop over all levels for this centre
-          for (int rl = min_rl; rl < centre.num_levels; ++ rl) {
+          for (int rl = min_rl; rl < centre._num_levels; ++ rl) {
             
             if (veryverbose) {
               cout << "Refinement level " << rl << ": importing refined region settings..." << endl;
             }
             
             // Calculate a bbox for this region
-            rvect const rmin = centre.position - centre.radius.at(rl);
-            rvect const rmax = centre.position + centre.radius.at(rl);
+            rvect const rmin = centre._position - centre._radius.at(rl);
+            rvect const rmax = centre._position + centre._radius.at(rl);
             
             // Convert to an integer bbox
             ivect const istride = hh.baseextents.at(0).at(rl).stride();
@@ -524,7 +517,7 @@ namespace CarpetRegrid2 {
           i2vect const cdistance =
             i2vect (min_distance + dd.prolongation_stencil_size(rl));
           
-          for (ibboxset::const_iterator ibb = regions.at(rl+1).begin();
+          for (ibset::const_iterator ibb = regions.at(rl+1).begin();
                ibb != regions.at(rl+1).end();
                ++ ibb)
           {
@@ -559,8 +552,8 @@ namespace CarpetRegrid2 {
           cout << "Refinement level " << rl << ": adding buffer zones..." << endl;
         }
 
-        ibboxset buffered;
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        ibset buffered;
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -592,7 +585,7 @@ namespace CarpetRegrid2 {
         }
 
         ibbox single;
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -608,7 +601,7 @@ namespace CarpetRegrid2 {
         // Would a single bbox be efficient enough?
         if (regions_size >= min_fraction * single_size) {
           // Combine the boxes
-          regions.at(rl) = ibboxset (single);
+          regions.at(rl) = ibset (single);
           if (veryverbose) {
             cout << "Refinement level " << rl << ": combining regions to " << regions.at(rl) << endl;
           }
@@ -709,8 +702,8 @@ namespace CarpetRegrid2 {
           cout << "Refinement level " << rl << ": making regions rotating-90 symmetric" << endl;
         }
         
-        ibboxset added;
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        ibset added;
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -796,8 +789,8 @@ namespace CarpetRegrid2 {
           cout << "Refinement level " << rl << ": making regions rotating-180 symmetric" << endl;
         }
         
-        ibboxset added;
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        ibset added;
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -881,8 +874,8 @@ namespace CarpetRegrid2 {
           cout << "Refinement level " << rl << ": clipping at outer boundary..." << endl;
         }
         
-        ibboxset clipped;
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        ibset clipped;
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -1001,7 +994,7 @@ namespace CarpetRegrid2 {
       {
         gh::cregs regs;
         regs.reserve (regions.at(rl).setsize());
-        for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+        for (ibset::const_iterator ibb = regions.at(rl).begin();
              ibb != regions.at(rl).end();
              ++ ibb)
         {
@@ -1046,7 +1039,7 @@ namespace CarpetRegrid2 {
       
       bool is_properly_nested = true;
       
-      for (ibboxset::const_iterator ibb = regions.at(rl).begin();
+      for (ibset::const_iterator ibb = regions.at(rl).begin();
            ibb != regions.at(rl).end();
            ++ ibb)
       {
@@ -1128,7 +1121,10 @@ namespace CarpetRegrid2 {
       }
     }
     
-    if (do_recompose and * last_iteration != -1) {
+    if (not force and do_recompose and * last_iteration != -1) {
+      
+      int lsh[2];
+      getvectorindex2 (cctkGH, "CarpetRegrid2::radii", lsh);
       
       // Regrid only if the regions have changed sufficiently
       do_recompose = false;
@@ -1164,31 +1160,37 @@ namespace CarpetRegrid2 {
         case 9: mindist = movement_threshold_10; break;
         default: assert (0);
         }
-        do_recompose = dist2 >= pow (mindist, 2);
+        do_recompose = dist2 > pow (mindist, 2);
         if (do_recompose) break;
         
         // Regrid if the radii have changed sufficiently
-        CCTK_REAL const rx = radius_x[n] < 0 ? radius[n] : radius_x[n];
-        CCTK_REAL const ry = radius_y[n] < 0 ? radius[n] : radius_y[n];
-        CCTK_REAL const rz = radius_z[n] < 0 ? radius[n] : radius_z[n];
-        rvect const rad (rx, ry, rz);
-        rvect const oldrad (old_radius_x[n], old_radius_y[n], old_radius_z[n]);
-        CCTK_REAL const dr = sqrt (sum (ipow (rad - oldrad, 2)));
-        CCTK_REAL mindr;
-        switch (n) {
-        case 0: mindr = radius_change_threshold_1; break;
-        case 1: mindr = radius_change_threshold_2; break;
-        case 2: mindr = radius_change_threshold_3; break;
-        case 3: mindr = radius_change_threshold_4; break;
-        case 4: mindr = radius_change_threshold_5; break;
-        case 5: mindr = radius_change_threshold_6; break;
-        case 6: mindr = radius_change_threshold_7; break;
-        case 7: mindr = radius_change_threshold_8; break;
-        case 8: mindr = radius_change_threshold_9; break;
-        case 9: mindr = radius_change_threshold_10; break;
-        default: assert (0);
-        }
-        do_recompose = dr >= mindr;
+        for (int rl = 1; rl < num_levels[n]; ++ rl) {
+          int const ind = index2 (lsh, rl, n);
+          CCTK_REAL const rx = radius_x[ind] < 0 ? radius[ind] : radius_x[ind];
+          CCTK_REAL const ry = radius_y[ind] < 0 ? radius[ind] : radius_y[ind];
+          CCTK_REAL const rz = radius_z[ind] < 0 ? radius[ind] : radius_z[ind];
+          rvect const rad (rx, ry, rz);
+          rvect const oldrad
+            (old_radius_x[ind], old_radius_y[ind], old_radius_z[ind]);
+          CCTK_REAL const drfac = 
+	    (sqrt (sum (ipow (rad - oldrad, 2))))/(sqrt (sum (ipow (oldrad, 2))));
+          CCTK_REAL mindrfac;
+          switch (n) {
+          case 0: mindrfac = radius_rel_change_threshold_1; break;
+          case 1: mindrfac = radius_rel_change_threshold_2; break;
+          case 2: mindrfac = radius_rel_change_threshold_3; break;
+          case 3: mindrfac = radius_rel_change_threshold_4; break;
+          case 4: mindrfac = radius_rel_change_threshold_5; break;
+          case 5: mindrfac = radius_rel_change_threshold_6; break;
+          case 6: mindrfac = radius_rel_change_threshold_7; break;
+          case 7: mindrfac = radius_rel_change_threshold_8; break;
+          case 8: mindrfac = radius_rel_change_threshold_9; break;
+          case 9: mindrfac = radius_rel_change_threshold_10; break;
+          default: assert (0);
+          }
+          do_recompose = drfac > mindrfac;
+          if (do_recompose) break;
+        } // for rl
         if (do_recompose) break;
         
       } // for n
@@ -1300,7 +1302,10 @@ namespace CarpetRegrid2 {
       }
     }
     
-    if (do_recompose and * last_iteration != -1) {
+    if (not force and do_recompose and * last_iteration != -1) {
+      
+      int lsh[2];
+      getvectorindex2 (cctkGH, "CarpetRegrid2::radii", lsh);
       
       // Regrid only if the regions have changed sufficiently
       do_recompose = false;
@@ -1336,31 +1341,37 @@ namespace CarpetRegrid2 {
         case 9: mindist = movement_threshold_10; break;
         default: assert (0);
         }
-        do_recompose = dist2 >= pow (mindist, 2);
+        do_recompose = dist2 > pow (mindist, 2);
         if (do_recompose) break;
         
         // Regrid if the radii have changed sufficiently
-        CCTK_REAL const rx = radius_x[n] < 0 ? radius[n] : radius_x[n];
-        CCTK_REAL const ry = radius_y[n] < 0 ? radius[n] : radius_y[n];
-        CCTK_REAL const rz = radius_z[n] < 0 ? radius[n] : radius_z[n];
-        rvect const rad (rx, ry, rz);
-        rvect const oldrad (old_radius_x[n], old_radius_y[n], old_radius_z[n]);
-        CCTK_REAL const dr = sqrt (sum (ipow (rad - oldrad, 2)));
-        CCTK_REAL mindr;
-        switch (n) {
-        case 0: mindr = radius_change_threshold_1; break;
-        case 1: mindr = radius_change_threshold_2; break;
-        case 2: mindr = radius_change_threshold_3; break;
-        case 3: mindr = radius_change_threshold_4; break;
-        case 4: mindr = radius_change_threshold_5; break;
-        case 5: mindr = radius_change_threshold_6; break;
-        case 6: mindr = radius_change_threshold_7; break;
-        case 7: mindr = radius_change_threshold_8; break;
-        case 8: mindr = radius_change_threshold_9; break;
-        case 9: mindr = radius_change_threshold_10; break;
-        default: assert (0);
-        }
-        do_recompose = dr >= mindr;
+        for (int rl = 1; rl < num_levels[n]; ++ rl) {
+          int const ind = index2 (lsh, rl, n);
+          CCTK_REAL const rx = radius_x[ind] < 0 ? radius[ind] : radius_x[ind];
+          CCTK_REAL const ry = radius_y[ind] < 0 ? radius[ind] : radius_y[ind];
+          CCTK_REAL const rz = radius_z[ind] < 0 ? radius[ind] : radius_z[ind];
+          rvect const rad (rx, ry, rz);
+          rvect const oldrad
+            (old_radius_x[ind], old_radius_y[ind], old_radius_z[ind]);
+          CCTK_REAL const drfac = 
+	    (sqrt (sum (ipow (rad - oldrad, 2))))/(sqrt (sum (ipow (oldrad, 2))));
+          CCTK_REAL mindrfac;
+          switch (n) {
+          case 0: mindrfac = radius_rel_change_threshold_1; break;
+          case 1: mindrfac = radius_rel_change_threshold_2; break;
+          case 2: mindrfac = radius_rel_change_threshold_3; break;
+          case 3: mindrfac = radius_rel_change_threshold_4; break;
+          case 4: mindrfac = radius_rel_change_threshold_5; break;
+          case 5: mindrfac = radius_rel_change_threshold_6; break;
+          case 6: mindrfac = radius_rel_change_threshold_7; break;
+          case 7: mindrfac = radius_rel_change_threshold_8; break;
+          case 8: mindrfac = radius_rel_change_threshold_9; break;
+          case 9: mindrfac = radius_rel_change_threshold_10; break;
+          default: assert (0);
+          }
+          do_recompose = drfac > mindrfac;
+          if (do_recompose) break;
+        } // for rl
         if (do_recompose) break;
         
       } // for n
@@ -1422,6 +1433,9 @@ namespace CarpetRegrid2 {
       // Make multigrid aware
       MakeMultigridBoxesMaps (cctkGH, regsss, regssss);
       
+      int lsh[2];
+      getvectorindex2 (cctkGH, "CarpetRegrid2::radii", lsh);
+      
       // Remember current positions
       for (int n = 0; n < num_centres; ++ n) {
         old_active[n] = active[n];
@@ -1432,9 +1446,12 @@ namespace CarpetRegrid2 {
         old_position_y[n] = position_y[n];
         old_position_z[n] = position_z[n];
         
-        old_radius_x[n] = radius_x[n] < 0 ? radius[n] : radius_x[n];
-        old_radius_y[n] = radius_y[n] < 0 ? radius[n] : radius_y[n];
-        old_radius_z[n] = radius_z[n] < 0 ? radius[n] : radius_z[n];
+	for (int rl = 1; rl < num_levels[n]; ++ rl) {
+	  int const ind = index2 (lsh, rl, n);
+	  old_radius_x[ind] = radius_x[ind] < 0 ? radius[ind] : radius_x[ind];
+	  old_radius_y[ind] = radius_y[ind] < 0 ? radius[ind] : radius_y[ind];
+	  old_radius_z[ind] = radius_z[ind] < 0 ? radius[ind] : radius_z[ind];
+	}
       }
       
     } // if do_recompose
diff --git a/Carpet/CarpetWeb/publications/carpet-publications.bib b/Carpet/CarpetWeb/publications/carpet-publications.bib
index 30b7f0f40..31412c418 100644
--- a/Carpet/CarpetWeb/publications/carpet-publications.bib
+++ b/Carpet/CarpetWeb/publications/carpet-publications.bib
@@ -1833,9 +1833,11 @@ THESIS:
                   Approach to Numerical Relativity},
   school =       {Louisiana State University},
   year =         2007,
-  url =          {http://www.cct.lsu.edu/~dorband/thesis.pdf},
+  url =
+                  {http://etd.lsu.edu/docs/available/etd-03202007-163153/},
   receiveddate = 2007,
 }
+@Comment also available at http://www.cct.lsu.edu/~dorband/thesis.pdf
 
 @PhdThesis{Carpet-Kastaun2007a,
   status =       {thesis},
@@ -1872,7 +1874,7 @@ THESIS:
   receiveddate = 2007,
 }
 
-@PhdThesis{Carpet-2007a,
+@PhdThesis{Carpet-Zenginoglu2007a,
   status =       {thesis},
   author =       {An{\i}l Zengino{\u{g}}lu},
   title =        {A conformal approach to numerical calculations of
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index e928e91e2..a0bbe564b 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -71,11 +71,11 @@ lc_statmap_t * lc_statmap_list = NULL;
 /* Find all possible thread topologies */
 /* This finds all possible thread topologies which can be expressed as
    NIxNJxNK x NIIxNJJxNKK.  More complex topologies, e.g. based on a
-   recursive subdiviston, are not considered (and cannot be expressed
-   with the data structures used in LoopControl).  I expect that more
-   complex topologies are not necessary, since the number of treads is
-   usually quite small and contains many small factors in its prime
-   decomposition.  */
+   recursive subdivision, are not considered (and cannot be expressed
+   with the data structures currently used in LoopControl).  I expect
+   that more complex topologies are not necessary, since the number of
+   threads is usually quite small and contains many small factors in
+   its prime decomposition.  */
 static
 void
 find_thread_topologies (lc_topology_t * restrict const topologies,
@@ -122,6 +122,7 @@ find_thread_topologies (lc_topology_t * restrict const topologies,
 }
 
 
+#if 0
 
 /* Find "good" tiling specifications */
 /* This calculates a subset of all possible thread specifications.
@@ -130,11 +131,10 @@ find_thread_topologies (lc_topology_t * restrict const topologies,
    "equally", so that one does not have to spend much effort
    investigating tiling specifications with very similar properties.
    For example, if there are 200 grid points, then half of the
-   possible tiling specifications consists of splitting the domain
-   into two subdomains with [100+N, 100-N] points.  This is avoided by
+   possible tiling specifications consist of splitting the domain into
+   two subdomains with [100+N, 100-N] points.  This is avoided by
    covering all possible tiling specifications in exponentially
    growing step sizes.  */
-#if 0
 static
 int tiling_compare (const void * const a, const void * const b)
 {
@@ -372,7 +372,7 @@ lc_stattime_find_create (lc_statset_t * restrict const ls,
   
   if (! lt) {
     lt = malloc (sizeof * lt);
-    assert(lt);
+    assert (lt);
     lc_stattime_init (lt, ls, state);
   }
   
@@ -406,23 +406,33 @@ lc_statset_init (lc_statset_t * restrict const ls,
   /*** Threads ****************************************************************/
   
   static int saved_maxthreads = -1;
-  static lc_topology_t * restrict * saved_topologies = NULL;
+  static lc_topology_t * * saved_topologies = NULL;
   static int * restrict saved_ntopologies = NULL;
-  
+
+  // Allocate memory the first time this function is called
   if (saved_maxthreads < 0) {
     saved_maxthreads = omp_get_max_threads();
     saved_topologies  = malloc (saved_maxthreads * sizeof * saved_topologies );
-    assert(saved_topologies);
     saved_ntopologies = malloc (saved_maxthreads * sizeof * saved_ntopologies);
-    assert(saved_ntopologies);
+    assert (saved_topologies );
+    assert (saved_ntopologies);
     for (int n=0; n<saved_maxthreads; ++n) {
       saved_topologies [n] = NULL;
       saved_ntopologies[n] = -1;
     }
   }
-  
+  // Reallocate memory in case we need more
   if (num_threads > saved_maxthreads) {
-    CCTK_WARN (CCTK_WARN_ABORT, "Thread count inconsistency");
+    int old_saved_maxthreads = saved_maxthreads;
+    saved_maxthreads = num_threads;
+    saved_topologies  = realloc (saved_topologies,  saved_maxthreads * sizeof * saved_topologies );
+    saved_ntopologies = realloc (saved_ntopologies, saved_maxthreads * sizeof * saved_ntopologies);
+    assert (saved_topologies );
+    assert (saved_ntopologies);
+    for (int n=old_saved_maxthreads; n<saved_maxthreads; ++n) {
+      saved_topologies [n] = NULL;
+      saved_ntopologies[n] = -1;
+    }
   }
   
   if (! saved_topologies[num_threads-1]) {
@@ -436,7 +446,7 @@ lc_statset_init (lc_statset_t * restrict const ls,
     
     saved_topologies[num_threads-1] =
       malloc (maxntopologies * sizeof * saved_topologies[num_threads-1]);
-    assert(saved_topologies[num_threads-1]);
+    assert (saved_topologies[num_threads-1]);
     find_thread_topologies
       (saved_topologies[num_threads-1],
        maxntopologies, & saved_ntopologies[num_threads-1],
@@ -445,6 +455,7 @@ lc_statset_init (lc_statset_t * restrict const ls,
       realloc (saved_topologies[num_threads-1],
                (saved_ntopologies[num_threads-1] *
                 sizeof * saved_topologies[num_threads-1]));
+    assert (saved_topologies[num_threads-1]);
     
     if (debug) {
       printf ("Found %d possible thread topologies\n",
@@ -481,12 +492,12 @@ lc_statset_init (lc_statset_t * restrict const ls,
       printf ("Dimension %d: %d points\n", d, ls->npoints[d]);
     }
     ls->tilings[d] = malloc (maxntilings * sizeof * ls->tilings[d]);
-    assert(ls->tilings[d]);
+    assert (ls->tilings[d]);
     find_tiling_specifications
       (ls->tilings[d], maxntilings, & ls->ntilings[d], ls->npoints[d]);
     ls->topology_ntilings[d] =
       malloc (ls->ntopologies * sizeof * ls->topology_ntilings[d]);
-    assert(ls->topology_ntilings[d]);
+    assert (ls->topology_ntilings[d]);
     for (int n = 0; n < ls->ntopologies; ++n) {
       int tiling;
       for (tiling = 1; tiling < ls->ntilings[d]; ++tiling) {
@@ -582,7 +593,7 @@ lc_statset_find_create (lc_statmap_t * restrict const lm,
   
   if (! ls) {
     ls = malloc (sizeof * ls);
-    assert(ls);
+    assert (ls);
     lc_statset_init (ls, lm, num_threads, npoints);
   }
   
@@ -631,7 +642,8 @@ lc_control_init (lc_control_t * restrict const lc,
                  lc_statmap_t * restrict const lm,
                  int const imin, int const jmin, int const kmin,
                  int const imax, int const jmax, int const kmax,
-                 int const ilsh, int const jlsh, int const klsh)
+                 int const ilsh, int const jlsh, int const klsh,
+                 int const di)
 {
   DECLARE_CCTK_PARAMETERS;
   
@@ -645,6 +657,7 @@ lc_control_init (lc_control_t * restrict const lc,
   assert (imin >= 0 && imax <= ilsh && ilsh >= 0);
   assert (jmin >= 0 && jmax <= jlsh && jlsh >= 0);
   assert (kmin >= 0 && kmax <= klsh && klsh >= 0);
+  assert (di > 0);
   
   /* Copy arguments */
   lc->imin = imin;
@@ -656,6 +669,7 @@ lc_control_init (lc_control_t * restrict const lc,
   lc->ilsh = ilsh;
   lc->jlsh = jlsh;
   lc->klsh = klsh;
+  lc->di   = di;                /* vector size */
   
   
   
@@ -663,11 +677,8 @@ lc_control_init (lc_control_t * restrict const lc,
   lc_statset_t * restrict ls;
 #pragma omp single copyprivate (ls)
   {
-    /* Get number of threads */
-    /* int const num_threads = omp_get_num_threads(); */
-    /* int const num_threads = omp_get_max_threads(); */
-    
     /* Calculate number of points */
+    /* TODO: Take vector size into account */
     int npoints[3];
     npoints[0] = lc_max (imax - imin, 0);
     npoints[1] = lc_max (jmax - jmin, 0);
@@ -1110,7 +1121,8 @@ CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc,
   lc_control_init (lc, lm,
                    * imin - 1, * jmin - 1, * kmin - 1,
                    * imax, * jmax, * kmax,
-                   * ilsh, * jlsh, * klsh);
+                   * ilsh, * jlsh, * klsh,
+                   1);
 }
 
 CCTK_FCALL
diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h
index 0f3b6d80d..e23806fcb 100644
--- a/Carpet/LoopControl/src/loopcontrol.h
+++ b/Carpet/LoopControl/src/loopcontrol.h
@@ -242,6 +242,7 @@ typedef struct lc_control_t {
   int imin, jmin, kmin;
   int imax, jmax, kmax;
   int ilsh, jlsh, klsh;
+  int di;
   
   /* Control settings for thread parallelism (useful for debugging) */
   int iiimin, jjjmin, kkkmin;
@@ -303,7 +304,8 @@ lc_control_init (lc_control_t * restrict lc,
                  lc_statmap_t * restrict lm,
                  int imin, int jmin, int kmin,
                  int imax, int jmax, int kmax,
-                 int ilsh, int jlsh, int klsh);
+                 int ilsh, int jlsh, int klsh,
+                 int di);
 
 void
 lc_control_finish (lc_control_t * restrict lc);
@@ -311,15 +313,24 @@ lc_control_finish (lc_control_t * restrict lc);
 
 
 #define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \
+  LC_LOOP3VEC(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh, 1)
+#define LC_ENDLOOP3(name)                       \
+  LC_ENDLOOP3VEC(name)
+
+#define LC_LOOP3VEC(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, ilsh_,jlsh_,klsh_, di_) \
   do {                                                                  \
     static int lc_initialised = 0;                                      \
     static lc_statmap_t lc_lm;                                          \
     if (! lc_initialised) {                                             \
       lc_statmap_init (& lc_initialised, & lc_lm, #name);               \
     }                                                                   \
+    int const lc_di = (di_);                                            \
     lc_control_t lc_lc;                                                 \
     lc_control_init (& lc_lc, & lc_lm,                                  \
-                     imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh);   \
+                     (imin_), (jmin_), (kmin_),                         \
+                     (imax_), (jmax_), (kmax_),                         \
+                     (ilsh_), (jlsh_), (klsh_),                         \
+                     lc_di);                                            \
                                                                         \
     /* Coarse loop */                                                   \
     for (int lc_kk = lc_lc.kkmin;                                       \
@@ -327,33 +338,33 @@ lc_control_finish (lc_control_t * restrict lc);
          lc_kk += lc_lc.kkstep)                                         \
     {                                                                   \
       int const lc_kmin = lc_kk + lc_lc.kkkkmin;                        \
-      int const lc_kmax =                                               \
-        lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax);                    \
+      int const lc_kmax = lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax);  \
                                                                         \
       for (int lc_jj = lc_lc.jjmin;                                     \
            lc_jj < lc_lc.jjmax;                                         \
            lc_jj += lc_lc.jjstep)                                       \
       {                                                                 \
         int const lc_jmin = lc_jj + lc_lc.jjjjmin;                      \
-        int const lc_jmax =                                             \
-          lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax);                  \
+        int const lc_jmax = lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax); \
                                                                         \
         for (int lc_ii = lc_lc.iimin;                                   \
              lc_ii < lc_lc.iimax;                                       \
              lc_ii += lc_lc.iistep)                                     \
         {                                                               \
           int const lc_imin = lc_ii + lc_lc.iiiimin;                    \
-          int const lc_imax =                                           \
-            lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax);                \
+          int const lc_imax = lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax); \
                                                                         \
           /* Fine loop */                                               \
           for (int k = lc_kmin; k < lc_kmax; ++k) {                     \
             for (int j = lc_jmin; j < lc_jmax; ++j) {                   \
               LC_PRELOOP_STATEMENTS                                     \
               {                                                         \
-                for (int i = lc_imin; i < lc_imax; ++i) {
+                int const lc_ipos =                                     \
+                  lc_imin + lc_lc.ilsh * (j + lc_lc.jlsh * k);          \
+                int const lc_ioffset = (lc_ipos & - lc_di) - lc_ipos;   \
+                for (int i = lc_imin + lc_ioffset; i < lc_imax; i += lc_di) {
 
-#define LC_ENDLOOP3(name)                       \
+#define LC_ENDLOOP3VEC(name)                    \
                 }                               \
               }                                 \
               LC_POSTLOOP_STATEMENTS            \
diff --git a/Carpet/doc/commit-messages.txt b/Carpet/doc/commit-messages.txt
new file mode 100644
index 000000000..f62291274
--- /dev/null
+++ b/Carpet/doc/commit-messages.txt
@@ -0,0 +1,81 @@
+HOW TO WRITE A COMMIT MESSAGE
+
+When one looks at a set of changes, then it can be really useful to
+see at a glance what a particular commit was about, and good commit
+messages are thus important.
+
+It can be frustrating to have to type a few lines of text after
+finding and correcting a particular obnoxious error, which then seems
+to serve only as delay before others can benefit from this correction.
+However, good commit messages are important.
+
+A commit message consists of a subject (the first line) and a body
+(the other lines, separated by an empty line). Many tools show only
+the subject when displaying many commits, so the subject has to give a
+high-level overview. The body can give more detail, but probably
+shouldn't contain more than a few lines.
+
+
+
+The subject should not be longer than 80 characters, so that lines
+don't wrap when many subjects are displayed.
+
+The subject will usually be read with the following questions in mind:
+- Do I need this patch?
+- Will this patch interfere with my current work?
+- Is it just a small change that I can safely apply?
+People will likely not be interested in details about what exactly was
+changed, as these questions can also be answered by looking at details
+of the patch.
+
+Therefore the subject should:
+- State roughly where the change occured (which thorn). In Carpet,
+  please prefix the subject with "ThornName:".
+- State what kind of change it was (new functionality, important error
+  correction, small bug fix). It should also say if it modifies many
+  lines of code ("reindent", "change variable name") since this can
+  lead to conflicts.
+- State whether the commit modifies/corrects/influences the major
+  functionality ("physics") of the thorn, and if it does so by
+  default;
+- Be as high level as possible: if you correct an indexing error that
+  influences regridding, then say "Don't regrid too often" rather than
+  "Correct indexing error".
+
+Keep in mind that people will read the subject without knowing any
+context about the patch. Providing this context is more important than
+describing the change itself. If 80 characters are not enough, then
+you are probably providing too much detail.
+
+
+
+The body of the commit message should give more detail, if necessary.
+The body will probably only be read (maybe with grep?) when tracking
+down a particular problem. The body of the message does not need to
+state things that the version control system already knows, such as
+e.g. which files are changed, nor anything else that can be seen in
+the diff.
+
+If the commit corrects and error, then describing the error in the
+commit message is a good idea. If an important class or function is
+modified, introduced, or deleted, then listing the class or function
+name may be helpful. (You don't need to document the class or function
+there; you should document it in the code instead.)
+
+Remember that the body of the commit message will not be read by
+people who look at the new code. Some people accidentally document new
+algorithms or new code in the commit message -- this is a bad idea,
+since documentation should go near the code where people will see it.
+It may be a nice idea to try and tell other people about the change in
+the commit message, but keep in mind that most users of a piece of
+code won't read the commit message anyway, so that these instructions
+need to go as comments into the code or official documentation.
+
+
+
+Overall, a commit message should describe the change, not the new
+state. It helps to write the message as a command, as in "Correct typo
+in comment", instead of a description ("This patch corrects a typo in
+a comment"), which is longer but doesn't convey any more information.
+Also, please use your full name and email address when creating a
+commit, so that people can ask you when they have questions.
diff --git a/Carpet/doc/domain-decomposition.tex b/Carpet/doc/domain-decomposition.tex
index ba4c5c645..2ce6758b3 100644
--- a/Carpet/doc/domain-decomposition.tex
+++ b/Carpet/doc/domain-decomposition.tex
@@ -5,7 +5,7 @@
 \usepackage{color}
 \usepackage{graphicx}
 \usepackage[latin9]{inputenc}
-\usepackage{mathpazo}
+% \usepackage{mathpazo}
 \usepackage[hyphens]{url}
 
 % Put this package last
diff --git a/CarpetDev/CarpetIOF5/src/output.cc b/CarpetDev/CarpetIOF5/src/output.cc
index 0966e5dfb..423d55222 100644
--- a/CarpetDev/CarpetIOF5/src/output.cc
+++ b/CarpetDev/CarpetIOF5/src/output.cc
@@ -286,6 +286,8 @@ namespace CarpetIOF5 {
         if (not output_ghost_points) {
           imin[d] += lghosts[d] / 2;
           imax[d] -= ughosts[d] / 2;
+          lghosts[d] = 0;
+          ughosts[d] = 0;
         }
         ioff[d] = lbnd[d] + imin[d];
         ilen[d] = imax[d] - imin[d];
author	Christian Reisswig <reisswig@tapir.caltech.edu>	2010-08-27 23:05:53 -0700
committer	Barry Wardell <barry.wardell@gmail.com>	2011-12-14 18:25:17 +0000
commit	a54aeac2cd92d6742efee10bb8f4c08affe25e24 (patch)
tree	c111593ac52d19a1b9e4a6170781a937f1f20ab5
parent	2205c4de243d010b29f651a4ecd51c32e7b161bb (diff)
parent	bed54e5df6d3f29d0d2ad473018fa35b0f994a2a (diff)