Merge branch 'master' of carpetgit@carpetcode.dyndns.org:carpet

author: Thomas Radke <tradke@damiana.damiana.admin> 2008-09-09 13:36:22 +0200
committer: Thomas Radke <tradke@damiana.damiana.admin> 2008-09-09 13:36:22 +0200
commit: 4317fcbec3eca5d859adf9d41c3b6d45af0e0734 (patch)
tree: cb76316eb2e78d929996bb81648f409d4ad27a7e /Carpet
parent: e73ecc295bc6be5b8c4d409b0b3beb8e6a8e3401 (diff)
parent: 28a33e7fe56c3a7c249e6c17160225f69df9119e (diff)
4 files changed, 462 insertions, 139 deletions
diff --git a/Carpet/Carpet/src/Restrict.cc b/Carpet/Carpet/src/Restrict.cc
index 6df4666fd..a0a94ddd4 100644
--- a/Carpet/Carpet/src/Restrict.cc
+++ b/Carpet/Carpet/src/Restrict.cc
@@ -83,11 +83,6 @@ namespace Carpet {
 
           for (int v = 0; v < (int)arrdata.at(g).at(m).data.size(); ++v) {
             ggf *const gv = arrdata.at(g).at(m).data.at(v);
-#if 0
-            for (int c = 0; c < vhh.at(m)->components(reflevel); ++c) {
-              gv->ref_restrict (state, tl, reflevel, c, mglevel, time);
-            }
-#endif
             gv->ref_restrict_all (state, tl, reflevel, mglevel, time);
           }
         }
diff --git a/Carpet/Carpet/src/modes.cc b/Carpet/Carpet/src/modes.cc
index 841710e16..1b092aab9 100644
--- a/Carpet/Carpet/src/modes.cc
+++ b/Carpet/Carpet/src/modes.cc
@@ -97,12 +97,6 @@ namespace Carpet {
         const int m = 0;
         const int c = CCTK_MyProc(cctkGH);
         
-#if 0
-        const ibbox& base = arrdata.at(group).at(m).hh->bases().at(ml).at(rl);
-#endif
-#if 0                           // dh::dbases
-        const ibbox& baseext = arrdata.at(group).at(m).dd->bases.at(ml).at(rl).exterior;
-#endif
         const ibbox& baseext = arrdata.at(group).at(m).hh->baseextents.at(ml).at(rl);
 	const ibbox& ext = arrdata.at(group).at(m).dd->boxes.at(ml).at(rl).at(c).exterior;
         const b2vect& obnds = arrdata.at(group).at(m).hh->outer_boundaries(rl,c);
@@ -352,10 +346,6 @@ namespace Carpet {
       }
       
       // Set grid shape
-#if 0                           // dh::dbases
-      const ibbox& coarseext = vdd.at(map)->bases.at(mglevel).at(0       ).exterior;
-      const ibbox& baseext   = vdd.at(map)->bases.at(mglevel).at(reflevel).exterior;
-#endif
       const ibbox& coarseext = vhh.at(map)->baseextents.at(mglevel).at(0       );
       const ibbox& baseext   = vhh.at(map)->baseextents.at(mglevel).at(reflevel);
 //       assert (all (baseext.lower() % baseext.stride() == 0));
@@ -452,9 +442,6 @@ namespace Carpet {
     if (mc_grouptype == CCTK_GF) {
       
       // Set cGH fields
-#if 0                           // dh::dbases
-      const ibbox& baseext = vdd.at(map)->bases.at(mglevel).at(reflevel).exterior;
-#endif
       const ibbox& baseext = vhh.at(map)->baseextents.at(mglevel).at(reflevel);
       const ibbox& ext = vdd.at(map)->boxes.at(mglevel).at(reflevel).at(component).exterior;
       const b2vect& obnds = vhh.at(map)->outer_boundaries(reflevel,component);
diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc
index 66c9e9fd1..fcc874264 100644
--- a/Carpet/CarpetInterp2/src/fasterp.cc
+++ b/Carpet/CarpetInterp2/src/fasterp.cc
@@ -4,6 +4,7 @@
 #include <cstddef>
 #include <cstdlib>
 #include <iostream>
+#include <sstream>
 
 #include <cctk.h>
 #include <cctk_Parameters.h>
@@ -21,17 +22,6 @@ namespace CarpetInterp2 {
   
   
   
-  // Erik's gdb cannot print local variables in functions where an
-  // assert fails.  Hence the calls to assert are temporarily moved
-  // into a function of their own.
-  void ASSERT (bool const cond)
-  {
-    if (cond) return;
-    abort();
-  }
-  
-  
-  
   int const ipoison = -1234567890;
   CCTK_REAL const poison = -1.0e+12;
   
@@ -39,21 +29,34 @@ namespace CarpetInterp2 {
   template<> int get_poison () { return ipoison; }
   template<> CCTK_REAL get_poison () { return poison; }
   
-  // template <typename T>
-  // void fill (vector<T> & v, T const & val)
-  // {
-  //   fill (v.begin(), v.end(), val);
-  // }
+  template <typename T>
+  void fill (vector<T> & v, T const & val)
+  {
+    // fill (v.begin(), v.end(), val);
+#pragma omp parallel for
+    for (int n=0; n<int(v.size()); ++n) {
+      v.AT(n) = val;
+    }
+  }
   
   template <typename T>
   void fill_with_poison (vector<T> & v)
   {
 #ifndef NDEBUG
-    // fill (v, get_poison<T>());
-    fill (v.begin(), v.end(), get_poison<T>());
+    fill (v, get_poison<T>());
+    // fill (v.begin(), v.end(), get_poison<T>());
 #endif
   }
   
+  template <typename T>
+  void check_for_poison (vector<T> const & v)
+  {
+#pragma omp parallel for
+    for (int n=0; n<int(v.size()); ++n) {
+      assert (v.AT(n) != get_poison<T>());
+    }
+  }
+  
   
   
   // Create an MPI datatype for location information
@@ -64,18 +67,24 @@ namespace CarpetInterp2 {
     static MPI_Datatype newtype;
     if (not initialised) {
       static fasterp_iloc_t s;
-#define ARRSIZE(m) (sizeof(s.m) / sizeof(s.m[0]))
-#define OFFSET(m)  ((char*)&(s.m) - (char*)&(s)) // offsetof doesn't work (why?)
-#define SIZE       (sizeof(s))
+#define ENTRY(type, name)                                               \
+      {                                                                 \
+        sizeof s.name / sizeof(type), /* count elements */              \
+          (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \
+          dist::datatype<type>()      /* find MPI datatype */           \
+          }
       dist::mpi_struct_descr_t const descr[] = {
-        {              3, OFFSET(mrc   ), MPI_INT                    },
-        {              1, OFFSET(ind3d ), MPI_INT                    },
-        {ARRSIZE(offset), OFFSET(offset), dist::datatype<CCTK_REAL>()},
-        {              1, SIZE          , MPI_UB                     }
+        ENTRY(int, mrc),
+#ifdef CARPET_DEBUG
+        ENTRY(int, pn),
+        ENTRY(int, ipos),
+        ENTRY(int, ind),
+#endif
+        ENTRY(int, ind3d),
+        ENTRY(CCTK_REAL, offset),
+        {1, sizeof(s), MPI_UB}
       };
-#undef ARRSIZE
-#undef OFFSET
-#undef SIZE
+#undef ENTRY
       dist::create_mpi_datatype
         (sizeof(descr) / sizeof(descr[0]), descr, newtype);
       initialised = true;
@@ -90,6 +99,11 @@ namespace CarpetInterp2 {
   {
     os << "fasterp_iloc_t{"
        << "mrc=" << mrc << ","
+#ifdef CARPET_DEBUG
+       << "pn=" << pn << ","
+       << "ipos=" << ipos << ","
+       << "ind=" << ind << ","
+#endif
        << "ind3d=" << ind3d << ","
        << "offset=" << offset << "}";
   }
@@ -124,7 +138,7 @@ namespace CarpetInterp2 {
   mrc_t::
   mrc_t (int ind)
   {
-    ASSERT (ind >= 0);
+    assert (ind >= 0);
     int const ind0 = ind;
     c = ind % components;
     ind /= components;
@@ -132,8 +146,8 @@ namespace CarpetInterp2 {
     ind /= reflevels;
     m = ind % maps;
     ind /= maps;
-    ASSERT (ind == 0);
-    ASSERT (get_ind() == ind0);
+    assert (ind == 0);
+    assert (get_ind() == ind0);
   }
   
   void
@@ -146,6 +160,16 @@ namespace CarpetInterp2 {
   
   
   
+  void
+  pn_t::
+  output (ostream& os)
+    const
+  {
+    os << "pn{p=" << p << ",n=" << n << "}";
+  }
+  
+  
+  
   //////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
@@ -161,9 +185,18 @@ namespace CarpetInterp2 {
                 ivect const & lsh,
                 int const order)
   {
-    ASSERT (order <= max_order);
+    assert (order <= max_order);
     CCTK_REAL const eps = 1.0e-12;
     
+#ifdef CARPET_DEBUG
+    mrc  = iloc.mrc;
+    pn   = iloc.pn;
+    ipos = iloc.ipos;
+    
+    // Save lsh
+    saved_lsh = lsh;
+#endif
+    
 #ifndef NDEBUG
     // Poison all coefficients
     for (int d=0; d<dim; ++d) {
@@ -174,9 +207,14 @@ namespace CarpetInterp2 {
 #endif
     
     if (order == 0) {
+      // Special case
       for (int d=0; d<dim; ++d) {
         exact[d] = true;
       }
+      
+#ifdef CARPET_DEBUG
+      ind   = iloc.ind;
+#endif
       ind3d = iloc.ind3d;
       return;
     }
@@ -193,7 +231,7 @@ namespace CarpetInterp2 {
     }
     rvect const offset = iloc.offset - rvect(iorigin);
     // Ensure that the stencil is centred
-    ASSERT (all (offset >= 0.5*(order-1) and offset < 0.5*(order+1)));
+    assert (all (offset >= 0.5*(order-1) and offset < 0.5*(order+1)));
     
     for (int d=0; d<dim; ++d) {
       // C_n = PRODUCT_m,m!=n [(x - x_m) / (x_n - x_m)]
@@ -224,12 +262,22 @@ namespace CarpetInterp2 {
         for (int n=0; n<=order; ++n) {
           s += coeffs[d][n];
         }
-        ASSERT (abs(s - 1.0) <= eps);
+        assert (abs(s - 1.0) <= eps);
       }
     }
     
     // Set 3D location of stencil anchor
+#ifdef CARPET_DEBUG
+    ind = iloc.ind + iorigin;
+    if (not (all (ind>=0 and ind+either(exact,0,order)<lsh))) {
+      cout << "*this=" << *this << " order=" << order << " lsh=" << lsh << endl;
+    }
+    assert (all (ind>=0 and ind+either(exact,0,order)<lsh));
+#endif
     ind3d = iloc.ind3d + index(lsh, iorigin);
+#ifdef CARPET_DEBUG
+    assert (index(lsh,ind) == ind3d);
+#endif
   }
   
   
@@ -250,26 +298,35 @@ namespace CarpetInterp2 {
                CCTK_REAL * restrict const vals)
     const
   {
-    ASSERT (O0 == 0 or not exact[0]);
-    ASSERT (O1 == 0 or not exact[1]);
-    ASSERT (O2 == 0 or not exact[2]);
+#ifdef CARPET_DEBUG
+    assert (all (lsh == saved_lsh));
+#endif
+    assert (O0 == 0 or not exact[0]);
+    assert (O1 == 0 or not exact[1]);
+    assert (O2 == 0 or not exact[2]);
     
     size_t const di = 1;
     size_t const dj = di * lsh[0];
     size_t const dk = dj * lsh[1];
     
+#ifdef CARPET_DEBUG
+    assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh));
+    assert (ind3d == index(lsh,ind));
+    assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind));
+#endif
+    
     for (size_t v=0; v<varptrs.size(); ++v) {
       vals[v] = 0.0;
     }
     
     for (size_t k=0; k<=O2; ++k) {
-      ASSERT (O2 == 0 or coeffs[2][k] != poison);
+      assert (O2 == 0 or coeffs[2][k] != poison);
       CCTK_REAL const coeff_k = (O2==0 ? 1.0 : coeffs[2][k]);
       for (size_t j=0; j<=O1; ++j) {
-        ASSERT (O1 == 0 or coeffs[1][j] != poison);
+        assert (O1 == 0 or coeffs[1][j] != poison);
         CCTK_REAL const coeff_jk = coeff_k * (O1==0 ? 1.0 : coeffs[1][j]);
         for (size_t i=0; i<=O0; ++i) {
-          ASSERT (O0 == 0 or coeffs[0][i] != poison);
+          assert (O0 == 0 or coeffs[0][i] != poison);
           CCTK_REAL const coeff_ijk = coeff_jk * (O0==0 ? 1.0 : coeffs[0][i]);
           
           for (size_t v=0; v<varptrs.size(); ++v) {
@@ -279,6 +336,18 @@ namespace CarpetInterp2 {
         }
       }
     }
+#ifdef CARPET_DEBUG
+#  if 0
+    for (size_t v=0; v<varptrs.size(); ++v) {
+      vals[v] = ind[0] + 1000 * (ind[1] + 1000 * ind[2]);
+    } // for v
+#  endif
+#  if 0
+    for (size_t v=0; v<varptrs.size(); ++v) {
+      vals[v] = pn.p * 1000000 + pn.n;
+    } // for v
+#  endif
+#endif
   }
   
   
@@ -357,8 +426,41 @@ namespace CarpetInterp2 {
     default:
       // Add higher orders here as desired
       CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented");
-      ASSERT (0);
+      assert (0);
+    }
+  }
+  
+  
+  
+  void
+  fasterp_src_loc_t::
+  output (ostream& os)
+    const
+  {
+    os << "fasterp_src_loc_t{";
+    os << "coeffs=[";
+    for (int d=0; d<dim; ++d) {
+      if (d>0) os << ",";
+      os << "[";
+      for (int n=0; n<=max_order; ++n) {
+        if (n>0) os << ",";
+        os << coeffs[d][n];
+      }
+      os << "]";
     }
+    os << "],";
+    os << "exact=" << exact << ",";
+#ifdef CARPET_DEBUG
+    os << "pn=" << pn << ",";
+    os << "mrc=" << mrc << ",";
+    os << "ipos=" << ipos << ",";
+    os << "ind=" << ind << ",";
+#endif
+    os << "ind3d=" << ind3d;
+#ifdef CARPET_DEBUG
+    os << "," << "saved_lsh=" << saved_lsh;
+#endif
+    os << "}";
   }
   
   
@@ -399,7 +501,7 @@ namespace CarpetInterp2 {
         (cctkGH, dim, npoints,
          coords,
          &local_locations.maps.front(), local_coords, NULL, NULL);
-      ASSERT (not ierr);
+      assert (not ierr);
       
     } else {
       // This is a single-patch simulation
@@ -461,14 +563,21 @@ namespace CarpetInterp2 {
     vector<rvect> lower (Carpet::maps);
     vector<rvect> upper (Carpet::maps);
     vector<rvect> delta (Carpet::maps); // spacing on finest possible grid
+    vector<rvect> idelta (Carpet::maps); // inverse spacing
     for (int m=0; m<Carpet::maps; ++m) {
       jvect gsh;
       int const ierr =
         GetCoordRange (cctkGH, m, Carpet::mglevel, dim,
                        & gsh[0],
                        & lower.AT(m)[0], & upper.AT(m)[0], & delta.AT(m)[0]);
-      ASSERT (not ierr);
+      assert (not ierr);
       delta.AT(m) /= Carpet::maxspacereflevelfact;
+      idelta.AT(m) = 1.0 / delta.AT(m);
+      if (veryverbose) {
+        cout << "GetCoordRange[" << m << "]: lower=" << lower.AT(m) << " upper=" << upper.AT(m) << " delta=" << delta.AT(m) << endl;
+      }
+
+
     }
     
     // Calculate refinement levels, components, and integer grid point
@@ -478,8 +587,8 @@ namespace CarpetInterp2 {
     vector<int> proc (npoints);
     fill_with_poison (proc);
     vector<int> nlocs (nprocs, 0);
-    int n_nz_nlocs = 0;
-    ASSERT (Carpet::is_level_mode());
+    assert (Carpet::is_level_mode());
+#pragma omp parallel for
     for (int n=0; n<npoints; ++n) {
       int const m = locations.maps.AT(n);
       rvect const pos (locations.coords[0].AT(n),
@@ -487,10 +596,11 @@ namespace CarpetInterp2 {
                        locations.coords[2].AT(n));
       
       gh const * const hh = Carpet::vhh.AT(m);
-      ibbox const & baseext = hh->baseextent(Carpet::mglevel, 0);
-      rvect const rpos =
-        (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) *
-        rvect (baseext.upper() - baseext.lower());
+      // ibbox const & baseext = hh->baseextent(Carpet::mglevel, 0);
+      // rvect const rpos =
+      //   (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) *
+      //   rvect (baseext.upper() - baseext.lower());
+      rvect const rpos = (pos - lower.AT(m)) * idelta.AT(m);
       
       // Find refinement level and component
       int rl, c;
@@ -499,38 +609,69 @@ namespace CarpetInterp2 {
                            Carpet::mglevel,
                            Carpet::reflevel, Carpet::reflevel+1,
                            rl, c, ipos);
-      ASSERT (rl>=0 and c>=0);
+      if (not (rl>=0 and c>=0)) {
+#pragma omp critical
+        {
+          ostringstream msg;
+          msg << "Interpolation point " << n << " on map " << m << " "
+              << "at " << pos << " is outside of the grid hierarchy";
+          CCTK_WARN (CCTK_WARN_ABORT, msg.str().c_str());
+        }
+      }
+      assert (rl>=0 and c>=0);
       
       ibbox const & ext =
         Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior;
       rvect dpos = rpos - rvect(ipos);
       
       // Convert from Carpet indexing to grid point indexing
-      ASSERT (all (ipos % ext.stride() == ivect(0)));
+      assert (all (ipos % ext.stride() == ivect(0)));
       ipos /= ext.stride();
       dpos /= rvect(ext.stride());
-      ASSERT (all (abs(dpos) <= rvect(0.5)));
+      assert (all (abs(dpos) <= rvect(0.5)));
       
       ivect const ind = ipos - ext.lower() / ext.stride();
       ivect const lsh = ext.shape() / ext.stride();
-      int const ind3d = index (lsh, ind);
-      // TODO: ASSERT that there are enough ghost zones
+      int const ind3d = index(lsh, ind);
+#if 0
+      ENTER_SINGLEMAP_MODE (cctkGH, m, CCTK_GF) {
+        ENTER_LOCAL_MODE (cctkGH, c, CCTK_GF) {
+          CCTK_REAL const * restrict const xptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::x");
+          CCTK_REAL const * restrict const yptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::y");
+          CCTK_REAL const * restrict const zptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::z");
+          assert (xptr);
+          assert (yptr);
+          assert (zptr);
+          cout << "CI2 map=" << m << " pos=" << pos << " ind=" << ind << " x=" << xptr[ind3d] << " y=" << yptr[ind3d] << " z=" << zptr[ind3d] << endl;
+        } LEAVE_LOCAL_MODE;
+      } LEAVE_SINGLEMAP_MODE;
+#endif
+      // TODO: assert that there are enough ghost zones
       
       // Store result
       fasterp_iloc_t & iloc = ilocs.AT(n);
       iloc.mrc    = mrc_t(m, rl, c);
+#ifdef CARPET_DEBUG
+      iloc.pn.p   = dist::rank();
+      iloc.pn.n   = n;
+      iloc.ipos   = ipos * ext.stride();
+      iloc.ind    = ind;
+#endif
       iloc.ind3d  = ind3d;
       iloc.offset = dpos;
       
       // Find source processor
       int const p = Carpet::vhh.AT(m)->processor(rl,c);
       proc.AT(n) = p;
-      if (nlocs.AT(p) == 0) ++n_nz_nlocs;
+#pragma omp atomic
       ++ nlocs.AT(p);
       
       // Output
       if (veryverbose) {
-        cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl;
+#pragma omp critical
+        {
+          cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl;
+        }
       }
     }
     
@@ -542,6 +683,12 @@ namespace CarpetInterp2 {
     // communicate.
     if (verbose) CCTK_INFO ("Determine set of communicating processors");
     {
+      int n_nz_nlocs = 0;
+      for (int p=0; p<nprocs; ++p) {
+        if (nlocs.AT(p) > 0) {
+          ++ n_nz_nlocs;
+        }
+      }
       recv_descr.procs.resize (n_nz_nlocs);
       recv_descr.procinds.resize (nprocs, -1);
       int pp = 0;
@@ -556,8 +703,8 @@ namespace CarpetInterp2 {
           offset += nlocs.AT(p);
         }
       }
-      ASSERT (pp == n_nz_nlocs);
-      ASSERT (offset == npoints);
+      assert (pp == n_nz_nlocs);
+      assert (offset == npoints);
       recv_descr.npoints = npoints;
     }
     
@@ -574,25 +721,31 @@ namespace CarpetInterp2 {
         index.AT(pp) = recv_descr.procs.AT(pp).offset;
       }
       recv_descr.index.resize (npoints);
+      // TODO: parallelise with OpenMP
       for (int n=0; n<npoints; ++n) {
         int const p = proc.AT(n);
         int const pp = recv_descr.procinds.AT(p);
-        ASSERT (pp >= 0);
+        assert (pp >= 0);
         recv_descr.index.AT(n) = index.AT(pp);
         ++index.AT(pp);
       }
       for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) {
         int const recv_npoints = index.AT(pp) - recv_descr.procs.AT(pp).offset;
-        ASSERT (recv_npoints == recv_descr.procs.AT(pp).npoints);
+        assert (recv_npoints == recv_descr.procs.AT(pp).npoints);
       }
-      vector<bool> received (npoints, false);
+#ifndef NDEBUG
+      vector<bool> received (npoints);
+      fill (received, false);
+#pragma omp parallel for
       for (int n=0; n<npoints; ++n) {
         assert (not received.AT(n));
         received.AT(n) = true;
       }
+#pragma omp parallel for
       for (int n=0; n<npoints; ++n) {
         assert (received.AT(n));
       }
+#endif
     }
     
     
@@ -602,7 +755,8 @@ namespace CarpetInterp2 {
     if (verbose) {
       CCTK_INFO ("Count and exchange number of communicated grid points");
     }
-    vector<int> recv_npoints (nprocs, 0), recv_offsets (nprocs);
+    vector<int> recv_npoints (nprocs), recv_offsets (nprocs);
+    fill (recv_npoints, 0);
     fill_with_poison (recv_offsets);
     {
       int offset = 0;
@@ -612,7 +766,7 @@ namespace CarpetInterp2 {
         recv_offsets.AT(p) = offset;
         offset += recv_npoints.AT(p);
       }
-      ASSERT (offset == npoints);
+      assert (offset == npoints);
     }
     vector<int> send_npoints (nprocs), send_offsets (nprocs);
     fill_with_poison (send_npoints);
@@ -629,6 +783,7 @@ namespace CarpetInterp2 {
     // Scatter the location information into a send-array, and
     // exchange it with MPI
     vector<fasterp_iloc_t> scattered_ilocs(npoints);
+#pragma omp parallel for
     for (int n=0; n<npoints; ++n) {
       scattered_ilocs.AT(recv_descr.index.AT(n)) = ilocs.AT(n);
     }
@@ -640,20 +795,35 @@ namespace CarpetInterp2 {
        fasterp_iloc_t::mpi_datatype(),
        comm_world);
     
+#ifdef CARPET_DEBUG
+    // Ensure that the ilocs were sent to the right processor
+    for (int p=0; p<nprocs; ++p) {
+#pragma omp parallel for
+      for (int n=0; n<send_npoints.at(p); ++n) {
+        fasterp_iloc_t const & iloc = gathered_ilocs.at(send_offsets.at(p) + n);
+        mrc_t const & mrc = iloc.mrc;
+        int const m  = mrc.m;
+        int const rl = mrc.rl;
+        int const c  = mrc.c;
+        assert (Carpet::vhh.AT(m)->is_local(rl,c));
+      }
+    }
+#endif
+    
     
     
     // Fill in send descriptors
     send_descr.npoints = npoints_send;
     
     {
-      int n_nz_recv_npoints = 0;
+      int n_nz_send_npoints = 0;
       for (int p=0; p<nprocs; ++p) {
-        if (recv_npoints.AT(p) > 0) ++n_nz_recv_npoints;
+        if (send_npoints.AT(p) > 0) ++n_nz_send_npoints;
       }
-      send_descr.procs.resize (n_nz_recv_npoints);
+      send_descr.procs.resize (n_nz_send_npoints);
       int pp = 0;
       for (int p=0; p<nprocs; ++p) {
-        if (recv_npoints.AT(p) > 0) {
+        if (send_npoints.AT(p) > 0) {
           send_proc_t & send_proc = send_descr.procs.AT(pp);
           send_proc.p       = p;
           send_proc.offset  = send_offsets.AT(p);
@@ -661,7 +831,7 @@ namespace CarpetInterp2 {
           ++pp;
         }
       }
-      ASSERT (pp == n_nz_recv_npoints);
+      assert (pp == n_nz_send_npoints);
     }
     
     
@@ -675,24 +845,27 @@ namespace CarpetInterp2 {
       vector<int> mrc2comp (maxmrc, -1);
       vector<int> comp2mrc (maxmrc);
       fill_with_poison (comp2mrc);
-      int comps = 0;
+      int ncomps = 0;
       
-      vector<int> npoints_comp (maxmrc, 0);
+      vector<int> npoints_comp (maxmrc);
+      fill (npoints_comp, 0);
       
+      // TODO: parallelise with OpenMP
       for (int n=0; n<send_proc.npoints; ++n) {
         fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n);
         int const mrc = iloc.mrc.get_ind();
         if (mrc2comp.AT(mrc) == -1) {
-          mrc2comp.AT(mrc) = comps;
-          comp2mrc.AT(comps) = mrc;
-          ++ comps;
+          mrc2comp.AT(mrc) = ncomps;
+          comp2mrc.AT(ncomps) = mrc;
+          ++ ncomps;
         }
         ++ npoints_comp.AT(mrc);
       }
-      send_proc.comps.resize(comps);
+      assert (ncomps <= maxmrc);
+      send_proc.comps.resize(ncomps);
       
       int offset = 0;
-      for (int comp=0; comp<comps; ++comp) {
+      for (int comp=0; comp<ncomps; ++comp) {
         send_comp_t & send_comp = send_proc.comps.AT(comp);
         int const mrc = comp2mrc.AT(comp);
         send_comp.mrc = mrc;
@@ -702,6 +875,7 @@ namespace CarpetInterp2 {
         int const m  = themrc.m;
         int const rl = themrc.rl;
         int const c  = themrc.c;
+        assert (Carpet::vhh.AT(m)->is_local(rl,c));
         ibbox const & ext =
           Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior;
         send_comp.lsh = ext.shape() / ext.stride();
@@ -710,10 +884,12 @@ namespace CarpetInterp2 {
         send_comp.npoints = npoints_comp.AT(mrc);
         offset += send_comp.npoints;
       }
-      ASSERT (offset == send_proc.npoints);
+      assert (offset == send_proc.npoints);
       
       send_proc.index.resize (send_proc.npoints);
       fill_with_poison (send_proc.index);
+      // TODO: This is not parallel!  Loop over comps instead?
+      // #pragma omp parallel for
       for (int n=0; n<send_proc.npoints; ++n) {
         fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n);
         int const mrc = iloc.mrc.get_ind();
@@ -727,13 +903,84 @@ namespace CarpetInterp2 {
         send_comp.locs.push_back (sloc);
       }
       
-      for (int comp=0; comp<comps; ++comp) {
+      for (int comp=0; comp<ncomps; ++comp) {
         send_comp_t & send_comp = send_proc.comps.AT(comp);
         assert (int(send_comp.locs.size()) == send_comp.npoints);
       }
       
+#ifndef NDEBUG
+      {
+        vector<bool> used(send_proc.npoints, false);
+        for (int n=0; n<send_proc.npoints; ++n) {
+          assert (not used.AT(send_proc.index.AT(n)));
+          used.AT(send_proc.index.AT(n)) = true;
+        }
+        for (int n=0; n<send_proc.npoints; ++n) {
+          assert (used.AT(send_proc.index.AT(n)));
+        }
+      }
+#endif
+      
+#ifdef CARPET_DEBUG
+      for (int comp=0; comp<ncomps; ++comp) {
+        send_comp_t const & send_comp = send_proc.comps.at(comp);
+        assert (int(send_comp.locs.size()) == send_comp.npoints);
+        for (int n=0; n<send_comp.npoints; ++n) {
+          fasterp_src_loc_t const & sloc = send_comp.locs.at(n);
+          assert (sloc.mrc == send_comp.mrc);
+          assert (all (sloc.saved_lsh == send_comp.lsh));
+        }
+      }
+#endif
+      
     } // for pp
     
+#ifdef CARPET_DEBUG
+    {
+      if (verbose) CCTK_INFO ("Compare send and receive counts");
+      
+      vector<int> recv_count (dist::size());
+      fill (recv_count, 0);
+      for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) {
+        recv_proc_t const & recv_proc = recv_descr.procs.AT(pp);
+        assert (recv_count.AT(recv_proc.p) == 0);
+        assert (recv_proc.npoints > 0);
+        recv_count.AT(recv_proc.p) = recv_proc.npoints;
+      }
+      
+      vector<int> send_count (dist::size());
+      fill (send_count, 0);
+      for (size_t pp=0; pp<send_descr.procs.size(); ++pp) {
+        send_proc_t const & send_proc = send_descr.procs.AT(pp);
+        assert (send_count.AT(send_proc.p) == 0);
+        assert (send_proc.npoints > 0);
+        send_count.AT(send_proc.p) = send_proc.npoints;
+      }
+      
+      {
+        vector<int> tmp_count (dist::size());
+        MPI_Alltoall (&send_count.front(), 1, MPI_INT,
+                      &tmp_count .front(), 1, MPI_INT,
+                      dist::comm());
+        swap (send_count, tmp_count);
+      }
+      bool error = false;
+      for (int p=0; p<dist::size(); ++p) {
+        if (not (send_count.AT(p) == recv_count.AT(p))) {
+          ostringstream buf;
+          buf << "Error: processor " << p << " "
+              << "sends me " << send_count.AT(p) << " points, "
+              << "while I receive " << recv_count.AT(p) << " from it";
+          CCTK_WARN (CCTK_WARN_ALERT, buf.str().c_str());
+          error = true;
+        }
+      }
+      if (error) {
+        CCTK_WARN (CCTK_WARN_ABORT, "Internal error in interpolation setup");
+      }
+    }
+#endif
+    
     if (verbose) CCTK_INFO ("Done.");
   }
   
@@ -758,33 +1005,36 @@ namespace CarpetInterp2 {
   {
     DECLARE_CCTK_PARAMETERS;
     
+    // Desired time level
+    int const tl = 0;           // current time
+    
     size_t const nvars = varinds.size();
     if (verbose) CCTK_VInfo (CCTK_THORNSTRING,
                              "Interpolating %d variables", int(nvars));
-    ASSERT (values.size() == nvars);
+    assert (values.size() == nvars);
     for (size_t v=0; v<values.size(); ++v) {
       int const vi = varinds.AT(v);
-      ASSERT (vi >= 0 and vi <= CCTK_NumVars());
+      assert (vi >= 0 and vi <= CCTK_NumVars());
       int const gi = CCTK_GroupIndexFromVarI (vi);
-      ASSERT (gi >= 0);
+      assert (gi >= 0);
       cGroup group;
       {
         int const ierr = CCTK_GroupData (gi, &group);
-        ASSERT (not ierr);
+        assert (not ierr);
       }
-      ASSERT (group.grouptype == CCTK_GF);
-      ASSERT (group.vartype == CCTK_VARIABLE_REAL);
-      ASSERT (group.dim == dim);
+      assert (group.grouptype == CCTK_GF);
+      assert (group.vartype == CCTK_VARIABLE_REAL);
+      assert (group.dim == dim);
 #if 0
       // Cannot be called in level mode
       cGroupDynamicData dyndata;
       {
         int const ierr = CCTK_GroupDynamicData (cctkGH, gi, &dyndata);
-        ASSERT (not ierr);
+        assert (not ierr);
       }
-      ASSERT (dyndata.activetimelevels >= 1);
+      assert (dyndata.activetimelevels > tl);
 #endif
-      ASSERT (values.AT(v) != NULL);
+      assert (values.AT(v) != NULL);
     }
     
     MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH);
@@ -795,6 +1045,10 @@ namespace CarpetInterp2 {
     vector<CCTK_REAL> recv_points (recv_descr.npoints * nvars);
     fill_with_poison (recv_points);
     vector<MPI_Request> recv_reqs (recv_descr.procs.size());
+#ifdef CARPET_DEBUG
+    vector<pn_t> recv_pn (recv_descr.npoints);
+    vector<MPI_Request> recv_reqs_pn (recv_descr.procs.size());
+#endif
     for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) {
       recv_proc_t const & recv_proc = recv_descr.procs.AT(pp);
       
@@ -802,83 +1056,113 @@ namespace CarpetInterp2 {
                  recv_proc.npoints * nvars,
                  dist::datatype<CCTK_REAL>(), recv_proc.p, mpi_tag,
                  comm_world, & recv_reqs.AT(pp));
+#ifdef CARPET_DEBUG
+      MPI_Irecv (& recv_pn.AT(recv_proc.offset),
+                 recv_proc.npoints * 2,
+                 dist::datatype<int>(), recv_proc.p, mpi_tag,
+                 comm_world, & recv_reqs_pn.AT(pp));
+#endif
     }
     
     // Interpolate data and post Isends
     if (verbose) CCTK_INFO ("Interpolating and posting MPI_Isends");
+    // TODO: Use one arrays per processor?
     vector<CCTK_REAL> send_points (send_descr.npoints * nvars);
     fill_with_poison (send_points);
     vector<MPI_Request> send_reqs (send_descr.procs.size());
+#ifdef CARPET_DEBUG
+    vector<pn_t> send_pn (send_descr.npoints);
+    vector<MPI_Request> send_reqs_pn (send_descr.procs.size());
+#endif
     for (size_t pp=0; pp<send_descr.procs.size(); ++pp) {
       send_proc_t const & send_proc = send_descr.procs.AT(pp);
       
-      vector<CCTK_REAL>::iterator
-        destit = send_points.begin() + send_proc.offset * nvars;
-      vector<CCTK_REAL>::iterator const
-        endit = destit + send_proc.npoints * nvars;
-      
+      vector<CCTK_REAL> computed_points (send_proc.npoints * nvars);
+      fill_with_poison (computed_points);
+#ifdef CARPET_DEBUG
+      vector<pn_t> computed_pn (send_descr.npoints);
+#endif
       for (size_t comp=0; comp<send_proc.comps.size(); ++comp) {
         send_comp_t const & send_comp = send_proc.comps.AT(comp);
         
         int const m  = send_comp.mrc.m;
         int const rl = send_comp.mrc.rl;
         int const c  = send_comp.mrc.c;
-        int const tl = 0;
         
         // Get pointers to 3D data
         vector<CCTK_REAL const *> varptrs (nvars);
         for (size_t v=0; v<nvars; ++v) {
           int const gi = CCTK_GroupIndexFromVarI (varinds.AT(v));
-          ASSERT (gi >= 0);
+          assert (gi >= 0);
           int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi);
-          ASSERT (vi >= 0);
+          assert (vi >= 0);
           varptrs.AT(v) =
             (CCTK_REAL const *)
             (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi))
             (tl, rl, c, Carpet::mglevel)->storage();
-          ASSERT (varptrs.AT(v));
+          assert (varptrs.AT(v));
         }
         
-        for (size_t n=0; n<send_comp.locs.size(); ++n) {
-          ASSERT (destit + nvars <= endit);
+#pragma omp parallel for
+        for (int n=0; n<int(send_comp.locs.size()); ++n) {
+          size_t const ind = (send_comp.offset + n) * nvars;
           send_comp.locs.AT(n).interpolate
-            (send_comp.lsh, order, varptrs, & * destit);
-          destit += nvars;
+            (send_comp.lsh, order, varptrs, &computed_points.AT(ind));
+#ifdef CARPET_DEBUG
+          computed_pn.AT(send_comp.offset + n) = send_comp.locs.AT(n).pn;
+#endif
         }
         
-      } // for comps
-      
-      ASSERT (destit == endit);
+      } // for comp
       
       // Gather send points
-      vector<CCTK_REAL> gathered_send_points (send_proc.npoints * nvars);
-      fill_with_poison (gathered_send_points);
+#pragma omp parallel for
       for (int n=0; n<send_proc.npoints; ++n) {
-        int const nn = send_proc.offset + send_proc.index.AT(n);
+        size_t const nn = send_proc.index.AT(n);
         for (size_t v=0; v<nvars; ++v) {
-          gathered_send_points.AT(n * nvars + v) =
-            send_points.AT(nn * nvars + v);
+          send_points.AT((send_proc.offset + n) * nvars + v) =
+            computed_points.AT(nn * nvars + v);
         }
-      }
-      for (int n=0; n<int(send_proc.npoints * nvars); ++n) {
-        int const nn = send_proc.offset * nvars + n;
-        send_points.AT(nn) = gathered_send_points.AT(n);
+#ifdef CARPET_DEBUG
+        send_pn.AT(send_proc.offset + n) = computed_pn.AT(nn);
+#endif
       }
       // TODO: Use a scatter index list instead of a gather index
       // list, and combine the scattering with the calculation
       
+#ifdef CARPET_DEBUG
+#pragma omp parallel for
+      for (int n=0; n<send_proc.npoints; ++n) {
+        assert (send_pn.AT(send_proc.offset + n).p == send_proc.p);
+        if (n>0) {
+          assert (send_pn.AT(send_proc.offset + n  ).n >
+                  send_pn.AT(send_proc.offset + n-1).n);
+        }
+      }
+#endif
+      
       MPI_Isend (& send_points.AT(send_proc.offset * nvars),
                  send_proc.npoints * nvars,
                  dist::datatype<CCTK_REAL>(), send_proc.p, mpi_tag,
                  comm_world, & send_reqs.AT(pp));
+#ifdef CARPET_DEBUG
+      MPI_Isend (& send_pn.AT(send_proc.offset),
+                 send_proc.npoints * 2,
+                 dist::datatype<int>(), send_proc.p, mpi_tag,
+                 comm_world, & send_reqs_pn.AT(pp));
+#endif
     } // for pp
     
     // Wait for Irecvs to complete
     if (verbose) CCTK_INFO ("Waiting for MPI_Irevcs to complete");
     MPI_Waitall (recv_reqs.size(), & recv_reqs.front(), MPI_STATUSES_IGNORE);
+#ifdef CARPET_DEBUG
+    MPI_Waitall (recv_reqs.size(), & recv_reqs_pn.front(), MPI_STATUSES_IGNORE);
+#endif
     
     // Gather data
     if (verbose) CCTK_INFO ("Gathering data");
+#pragma omp parallel for
     for (int n=0; n<recv_descr.npoints; ++n) {
       size_t const nn = recv_descr.index.AT(n);
       for (size_t v=0; v<nvars; ++v) {
@@ -887,11 +1171,18 @@ namespace CarpetInterp2 {
         recv_points.AT(nn * nvars + v) = poison;
 #endif
       }
+#ifdef CARPET_DEBUG
+      assert (recv_pn.AT(nn).p == dist::rank());
+      assert (recv_pn.AT(nn).n == n);
+#endif
     }
     
     // Wait for Isends to complete
     if (verbose) CCTK_INFO ("Waiting for MPI_Isends to complete");
     MPI_Waitall (send_reqs.size(), & send_reqs.front(), MPI_STATUSES_IGNORE);
+#ifdef CARPET_DEBUG
+    MPI_Waitall (send_reqs.size(), & send_reqs_pn.front(), MPI_STATUSES_IGNORE);
+#endif
     
     if (verbose) CCTK_INFO ("Done.");
   }
diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh
index d136ff900..3981f0a67 100644
--- a/Carpet/CarpetInterp2/src/fasterp.hh
+++ b/Carpet/CarpetInterp2/src/fasterp.hh
@@ -23,13 +23,15 @@ namespace CarpetInterp2 {
   
   int const dim = 3;
   
-  // An interpolation point descriptor requires (3 * (max_order+1) +
-  // 1) double precision values of memory
+  // Each interpolation point descriptor requires
+  //    (dim * (max_order+1) + 1)
+  // CCTK_REAL values of memory
   int const max_order = 5;
   
   
   
-  // Keep a map, refinement level, and component index together
+  // Keep a map, refinement level, and component index together, and
+  // map the triple to small integers
   struct mrc_t {
     static int maps;
     static int reflevels;
@@ -66,6 +68,11 @@ namespace CarpetInterp2 {
       return c + components * (rl + reflevels * m);
     }
     
+    bool operator== (mrc_t const & a) const
+    {
+      return m == a.m and rl == a.rl and c == a.c;
+    }
+    
     void output (ostream& os) const;
   };
   
@@ -73,8 +80,33 @@ namespace CarpetInterp2 {
   ostream& operator<< (ostream& os, mrc_t const & mrc)
   { mrc.output(os); return os; }
   
+  
+  
+  // Keep a processor and an integer index together
+  struct pn_t {
+    int p;
+    int n;
     
+    pn_t ()
+    {
+    }
     
+    pn_t (int const p_, int const n_)
+      : p(p_), n(n_)
+    {
+      assert (p>=0 and p<dist::size());
+      assert (n>=0);
+    }
+    
+    void output (ostream& os) const;
+  };
+  
+  inline
+  ostream& operator<< (ostream& os, pn_t const & pn)
+  { pn.output(os); return os; }
+  
+  
+  
   // A global location, given by its global coordinates
   struct fasterp_glocs_t {
     vector<CCTK_REAL> coords[dim];
@@ -106,8 +138,12 @@ namespace CarpetInterp2 {
   struct fasterp_iloc_t {
     mrc_t mrc;                  // map, refinement level, component
     
-    // ivect idx;
-    int ind3d;
+#ifdef CARPET_DEBUG
+    pn_t pn;                    // origin of this point
+    ivect ipos;                 // closest grid point (Carpet indexing)
+    ivect ind;                  // closest grid point (local indexing)
+#endif
+    int ind3d;                  // closest grid point
     rvect offset;               // in terms of grid points
     
     static MPI_Datatype mpi_datatype ();
@@ -130,9 +166,22 @@ namespace CarpetInterp2 {
     CCTK_REAL coeffs[dim][max_order+1]; // interpolation coefficients
     bvect exact;
     
-    // ivect idx;
+#ifdef CARPET_DEBUG
+  public:
+    pn_t pn;                    // origin of this point
+    mrc_t mrc;                  // map, refinement level, component
+    ivect ipos;                 // closest grid point (Carpet indexing)
+    ivect ind;                  // source grid point offset
+  private:
+#endif
     int ind3d;                  // source grid point offset
     
+#ifdef CARPET_DEBUG
+  public:
+    ivect saved_lsh;            // copy of lsh
+  private:
+#endif
+    
   public:
     void
     calc_stencil (fasterp_iloc_t const & iloc,
@@ -211,8 +260,8 @@ namespace CarpetInterp2 {
     int offset;
     int npoints;                // total number of sent points
     
-    vector<int> index;   // gather index list: index[mpi-location] =
-                         // calculation-location
+    // gather index list: index[mpi-location] = calculation-location
+    vector<int> index;
   };
   
   struct send_descr_t {
@@ -249,7 +298,8 @@ namespace CarpetInterp2 {
                  vector<CCTK_REAL *> & values)
       const;
     
-    size_t get_npoints ()
+    size_t
+    get_npoints ()
       const
     {
       return recv_descr.npoints;
author	Thomas Radke <tradke@damiana.damiana.admin>	2008-09-09 13:36:22 +0200
committer	Thomas Radke <tradke@damiana.damiana.admin>	2008-09-09 13:36:22 +0200
commit	4317fcbec3eca5d859adf9d41c3b6d45af0e0734 (patch)
tree	cb76316eb2e78d929996bb81648f409d4ad27a7e /Carpet
parent	e73ecc295bc6be5b8c4d409b0b3beb8e6a8e3401 (diff)
parent	28a33e7fe56c3a7c249e6c17160225f69df9119e (diff)