diff options
author | Thomas Radke <tradke@damiana.damiana.admin> | 2008-09-09 13:36:22 +0200 |
---|---|---|
committer | Thomas Radke <tradke@damiana.damiana.admin> | 2008-09-09 13:36:22 +0200 |
commit | 4317fcbec3eca5d859adf9d41c3b6d45af0e0734 (patch) | |
tree | cb76316eb2e78d929996bb81648f409d4ad27a7e /Carpet | |
parent | e73ecc295bc6be5b8c4d409b0b3beb8e6a8e3401 (diff) | |
parent | 28a33e7fe56c3a7c249e6c17160225f69df9119e (diff) |
Merge branch 'master' of carpetgit@carpetcode.dyndns.org:carpet
Diffstat (limited to 'Carpet')
-rw-r--r-- | Carpet/Carpet/src/Restrict.cc | 5 | ||||
-rw-r--r-- | Carpet/Carpet/src/modes.cc | 13 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.cc | 515 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.hh | 68 |
4 files changed, 462 insertions, 139 deletions
diff --git a/Carpet/Carpet/src/Restrict.cc b/Carpet/Carpet/src/Restrict.cc index 6df4666fd..a0a94ddd4 100644 --- a/Carpet/Carpet/src/Restrict.cc +++ b/Carpet/Carpet/src/Restrict.cc @@ -83,11 +83,6 @@ namespace Carpet { for (int v = 0; v < (int)arrdata.at(g).at(m).data.size(); ++v) { ggf *const gv = arrdata.at(g).at(m).data.at(v); -#if 0 - for (int c = 0; c < vhh.at(m)->components(reflevel); ++c) { - gv->ref_restrict (state, tl, reflevel, c, mglevel, time); - } -#endif gv->ref_restrict_all (state, tl, reflevel, mglevel, time); } } diff --git a/Carpet/Carpet/src/modes.cc b/Carpet/Carpet/src/modes.cc index 841710e16..1b092aab9 100644 --- a/Carpet/Carpet/src/modes.cc +++ b/Carpet/Carpet/src/modes.cc @@ -97,12 +97,6 @@ namespace Carpet { const int m = 0; const int c = CCTK_MyProc(cctkGH); -#if 0 - const ibbox& base = arrdata.at(group).at(m).hh->bases().at(ml).at(rl); -#endif -#if 0 // dh::dbases - const ibbox& baseext = arrdata.at(group).at(m).dd->bases.at(ml).at(rl).exterior; -#endif const ibbox& baseext = arrdata.at(group).at(m).hh->baseextents.at(ml).at(rl); const ibbox& ext = arrdata.at(group).at(m).dd->boxes.at(ml).at(rl).at(c).exterior; const b2vect& obnds = arrdata.at(group).at(m).hh->outer_boundaries(rl,c); @@ -352,10 +346,6 @@ namespace Carpet { } // Set grid shape -#if 0 // dh::dbases - const ibbox& coarseext = vdd.at(map)->bases.at(mglevel).at(0 ).exterior; - const ibbox& baseext = vdd.at(map)->bases.at(mglevel).at(reflevel).exterior; -#endif const ibbox& coarseext = vhh.at(map)->baseextents.at(mglevel).at(0 ); const ibbox& baseext = vhh.at(map)->baseextents.at(mglevel).at(reflevel); // assert (all (baseext.lower() % baseext.stride() == 0)); @@ -452,9 +442,6 @@ namespace Carpet { if (mc_grouptype == CCTK_GF) { // Set cGH fields -#if 0 // dh::dbases - const ibbox& baseext = vdd.at(map)->bases.at(mglevel).at(reflevel).exterior; -#endif const ibbox& baseext = vhh.at(map)->baseextents.at(mglevel).at(reflevel); const ibbox& ext = vdd.at(map)->boxes.at(mglevel).at(reflevel).at(component).exterior; const b2vect& obnds = vhh.at(map)->outer_boundaries(reflevel,component); diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc index 66c9e9fd1..fcc874264 100644 --- a/Carpet/CarpetInterp2/src/fasterp.cc +++ b/Carpet/CarpetInterp2/src/fasterp.cc @@ -4,6 +4,7 @@ #include <cstddef> #include <cstdlib> #include <iostream> +#include <sstream> #include <cctk.h> #include <cctk_Parameters.h> @@ -21,17 +22,6 @@ namespace CarpetInterp2 { - // Erik's gdb cannot print local variables in functions where an - // assert fails. Hence the calls to assert are temporarily moved - // into a function of their own. - void ASSERT (bool const cond) - { - if (cond) return; - abort(); - } - - - int const ipoison = -1234567890; CCTK_REAL const poison = -1.0e+12; @@ -39,21 +29,34 @@ namespace CarpetInterp2 { template<> int get_poison () { return ipoison; } template<> CCTK_REAL get_poison () { return poison; } - // template <typename T> - // void fill (vector<T> & v, T const & val) - // { - // fill (v.begin(), v.end(), val); - // } + template <typename T> + void fill (vector<T> & v, T const & val) + { + // fill (v.begin(), v.end(), val); +#pragma omp parallel for + for (int n=0; n<int(v.size()); ++n) { + v.AT(n) = val; + } + } template <typename T> void fill_with_poison (vector<T> & v) { #ifndef NDEBUG - // fill (v, get_poison<T>()); - fill (v.begin(), v.end(), get_poison<T>()); + fill (v, get_poison<T>()); + // fill (v.begin(), v.end(), get_poison<T>()); #endif } + template <typename T> + void check_for_poison (vector<T> const & v) + { +#pragma omp parallel for + for (int n=0; n<int(v.size()); ++n) { + assert (v.AT(n) != get_poison<T>()); + } + } + // Create an MPI datatype for location information @@ -64,18 +67,24 @@ namespace CarpetInterp2 { static MPI_Datatype newtype; if (not initialised) { static fasterp_iloc_t s; -#define ARRSIZE(m) (sizeof(s.m) / sizeof(s.m[0])) -#define OFFSET(m) ((char*)&(s.m) - (char*)&(s)) // offsetof doesn't work (why?) -#define SIZE (sizeof(s)) +#define ENTRY(type, name) \ + { \ + sizeof s.name / sizeof(type), /* count elements */ \ + (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \ + dist::datatype<type>() /* find MPI datatype */ \ + } dist::mpi_struct_descr_t const descr[] = { - { 3, OFFSET(mrc ), MPI_INT }, - { 1, OFFSET(ind3d ), MPI_INT }, - {ARRSIZE(offset), OFFSET(offset), dist::datatype<CCTK_REAL>()}, - { 1, SIZE , MPI_UB } + ENTRY(int, mrc), +#ifdef CARPET_DEBUG + ENTRY(int, pn), + ENTRY(int, ipos), + ENTRY(int, ind), +#endif + ENTRY(int, ind3d), + ENTRY(CCTK_REAL, offset), + {1, sizeof(s), MPI_UB} }; -#undef ARRSIZE -#undef OFFSET -#undef SIZE +#undef ENTRY dist::create_mpi_datatype (sizeof(descr) / sizeof(descr[0]), descr, newtype); initialised = true; @@ -90,6 +99,11 @@ namespace CarpetInterp2 { { os << "fasterp_iloc_t{" << "mrc=" << mrc << "," +#ifdef CARPET_DEBUG + << "pn=" << pn << "," + << "ipos=" << ipos << "," + << "ind=" << ind << "," +#endif << "ind3d=" << ind3d << "," << "offset=" << offset << "}"; } @@ -124,7 +138,7 @@ namespace CarpetInterp2 { mrc_t:: mrc_t (int ind) { - ASSERT (ind >= 0); + assert (ind >= 0); int const ind0 = ind; c = ind % components; ind /= components; @@ -132,8 +146,8 @@ namespace CarpetInterp2 { ind /= reflevels; m = ind % maps; ind /= maps; - ASSERT (ind == 0); - ASSERT (get_ind() == ind0); + assert (ind == 0); + assert (get_ind() == ind0); } void @@ -146,6 +160,16 @@ namespace CarpetInterp2 { + void + pn_t:: + output (ostream& os) + const + { + os << "pn{p=" << p << ",n=" << n << "}"; + } + + + ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// @@ -161,9 +185,18 @@ namespace CarpetInterp2 { ivect const & lsh, int const order) { - ASSERT (order <= max_order); + assert (order <= max_order); CCTK_REAL const eps = 1.0e-12; +#ifdef CARPET_DEBUG + mrc = iloc.mrc; + pn = iloc.pn; + ipos = iloc.ipos; + + // Save lsh + saved_lsh = lsh; +#endif + #ifndef NDEBUG // Poison all coefficients for (int d=0; d<dim; ++d) { @@ -174,9 +207,14 @@ namespace CarpetInterp2 { #endif if (order == 0) { + // Special case for (int d=0; d<dim; ++d) { exact[d] = true; } + +#ifdef CARPET_DEBUG + ind = iloc.ind; +#endif ind3d = iloc.ind3d; return; } @@ -193,7 +231,7 @@ namespace CarpetInterp2 { } rvect const offset = iloc.offset - rvect(iorigin); // Ensure that the stencil is centred - ASSERT (all (offset >= 0.5*(order-1) and offset < 0.5*(order+1))); + assert (all (offset >= 0.5*(order-1) and offset < 0.5*(order+1))); for (int d=0; d<dim; ++d) { // C_n = PRODUCT_m,m!=n [(x - x_m) / (x_n - x_m)] @@ -224,12 +262,22 @@ namespace CarpetInterp2 { for (int n=0; n<=order; ++n) { s += coeffs[d][n]; } - ASSERT (abs(s - 1.0) <= eps); + assert (abs(s - 1.0) <= eps); } } // Set 3D location of stencil anchor +#ifdef CARPET_DEBUG + ind = iloc.ind + iorigin; + if (not (all (ind>=0 and ind+either(exact,0,order)<lsh))) { + cout << "*this=" << *this << " order=" << order << " lsh=" << lsh << endl; + } + assert (all (ind>=0 and ind+either(exact,0,order)<lsh)); +#endif ind3d = iloc.ind3d + index(lsh, iorigin); +#ifdef CARPET_DEBUG + assert (index(lsh,ind) == ind3d); +#endif } @@ -250,26 +298,35 @@ namespace CarpetInterp2 { CCTK_REAL * restrict const vals) const { - ASSERT (O0 == 0 or not exact[0]); - ASSERT (O1 == 0 or not exact[1]); - ASSERT (O2 == 0 or not exact[2]); +#ifdef CARPET_DEBUG + assert (all (lsh == saved_lsh)); +#endif + assert (O0 == 0 or not exact[0]); + assert (O1 == 0 or not exact[1]); + assert (O2 == 0 or not exact[2]); size_t const di = 1; size_t const dj = di * lsh[0]; size_t const dk = dj * lsh[1]; +#ifdef CARPET_DEBUG + assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh)); + assert (ind3d == index(lsh,ind)); + assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind)); +#endif + for (size_t v=0; v<varptrs.size(); ++v) { vals[v] = 0.0; } for (size_t k=0; k<=O2; ++k) { - ASSERT (O2 == 0 or coeffs[2][k] != poison); + assert (O2 == 0 or coeffs[2][k] != poison); CCTK_REAL const coeff_k = (O2==0 ? 1.0 : coeffs[2][k]); for (size_t j=0; j<=O1; ++j) { - ASSERT (O1 == 0 or coeffs[1][j] != poison); + assert (O1 == 0 or coeffs[1][j] != poison); CCTK_REAL const coeff_jk = coeff_k * (O1==0 ? 1.0 : coeffs[1][j]); for (size_t i=0; i<=O0; ++i) { - ASSERT (O0 == 0 or coeffs[0][i] != poison); + assert (O0 == 0 or coeffs[0][i] != poison); CCTK_REAL const coeff_ijk = coeff_jk * (O0==0 ? 1.0 : coeffs[0][i]); for (size_t v=0; v<varptrs.size(); ++v) { @@ -279,6 +336,18 @@ namespace CarpetInterp2 { } } } +#ifdef CARPET_DEBUG +# if 0 + for (size_t v=0; v<varptrs.size(); ++v) { + vals[v] = ind[0] + 1000 * (ind[1] + 1000 * ind[2]); + } // for v +# endif +# if 0 + for (size_t v=0; v<varptrs.size(); ++v) { + vals[v] = pn.p * 1000000 + pn.n; + } // for v +# endif +#endif } @@ -357,8 +426,41 @@ namespace CarpetInterp2 { default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented"); - ASSERT (0); + assert (0); + } + } + + + + void + fasterp_src_loc_t:: + output (ostream& os) + const + { + os << "fasterp_src_loc_t{"; + os << "coeffs=["; + for (int d=0; d<dim; ++d) { + if (d>0) os << ","; + os << "["; + for (int n=0; n<=max_order; ++n) { + if (n>0) os << ","; + os << coeffs[d][n]; + } + os << "]"; } + os << "],"; + os << "exact=" << exact << ","; +#ifdef CARPET_DEBUG + os << "pn=" << pn << ","; + os << "mrc=" << mrc << ","; + os << "ipos=" << ipos << ","; + os << "ind=" << ind << ","; +#endif + os << "ind3d=" << ind3d; +#ifdef CARPET_DEBUG + os << "," << "saved_lsh=" << saved_lsh; +#endif + os << "}"; } @@ -399,7 +501,7 @@ namespace CarpetInterp2 { (cctkGH, dim, npoints, coords, &local_locations.maps.front(), local_coords, NULL, NULL); - ASSERT (not ierr); + assert (not ierr); } else { // This is a single-patch simulation @@ -461,14 +563,21 @@ namespace CarpetInterp2 { vector<rvect> lower (Carpet::maps); vector<rvect> upper (Carpet::maps); vector<rvect> delta (Carpet::maps); // spacing on finest possible grid + vector<rvect> idelta (Carpet::maps); // inverse spacing for (int m=0; m<Carpet::maps; ++m) { jvect gsh; int const ierr = GetCoordRange (cctkGH, m, Carpet::mglevel, dim, & gsh[0], & lower.AT(m)[0], & upper.AT(m)[0], & delta.AT(m)[0]); - ASSERT (not ierr); + assert (not ierr); delta.AT(m) /= Carpet::maxspacereflevelfact; + idelta.AT(m) = 1.0 / delta.AT(m); + if (veryverbose) { + cout << "GetCoordRange[" << m << "]: lower=" << lower.AT(m) << " upper=" << upper.AT(m) << " delta=" << delta.AT(m) << endl; + } + + } // Calculate refinement levels, components, and integer grid point @@ -478,8 +587,8 @@ namespace CarpetInterp2 { vector<int> proc (npoints); fill_with_poison (proc); vector<int> nlocs (nprocs, 0); - int n_nz_nlocs = 0; - ASSERT (Carpet::is_level_mode()); + assert (Carpet::is_level_mode()); +#pragma omp parallel for for (int n=0; n<npoints; ++n) { int const m = locations.maps.AT(n); rvect const pos (locations.coords[0].AT(n), @@ -487,10 +596,11 @@ namespace CarpetInterp2 { locations.coords[2].AT(n)); gh const * const hh = Carpet::vhh.AT(m); - ibbox const & baseext = hh->baseextent(Carpet::mglevel, 0); - rvect const rpos = - (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) * - rvect (baseext.upper() - baseext.lower()); + // ibbox const & baseext = hh->baseextent(Carpet::mglevel, 0); + // rvect const rpos = + // (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) * + // rvect (baseext.upper() - baseext.lower()); + rvect const rpos = (pos - lower.AT(m)) * idelta.AT(m); // Find refinement level and component int rl, c; @@ -499,38 +609,69 @@ namespace CarpetInterp2 { Carpet::mglevel, Carpet::reflevel, Carpet::reflevel+1, rl, c, ipos); - ASSERT (rl>=0 and c>=0); + if (not (rl>=0 and c>=0)) { +#pragma omp critical + { + ostringstream msg; + msg << "Interpolation point " << n << " on map " << m << " " + << "at " << pos << " is outside of the grid hierarchy"; + CCTK_WARN (CCTK_WARN_ABORT, msg.str().c_str()); + } + } + assert (rl>=0 and c>=0); ibbox const & ext = Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; rvect dpos = rpos - rvect(ipos); // Convert from Carpet indexing to grid point indexing - ASSERT (all (ipos % ext.stride() == ivect(0))); + assert (all (ipos % ext.stride() == ivect(0))); ipos /= ext.stride(); dpos /= rvect(ext.stride()); - ASSERT (all (abs(dpos) <= rvect(0.5))); + assert (all (abs(dpos) <= rvect(0.5))); ivect const ind = ipos - ext.lower() / ext.stride(); ivect const lsh = ext.shape() / ext.stride(); - int const ind3d = index (lsh, ind); - // TODO: ASSERT that there are enough ghost zones + int const ind3d = index(lsh, ind); +#if 0 + ENTER_SINGLEMAP_MODE (cctkGH, m, CCTK_GF) { + ENTER_LOCAL_MODE (cctkGH, c, CCTK_GF) { + CCTK_REAL const * restrict const xptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::x"); + CCTK_REAL const * restrict const yptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::y"); + CCTK_REAL const * restrict const zptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::z"); + assert (xptr); + assert (yptr); + assert (zptr); + cout << "CI2 map=" << m << " pos=" << pos << " ind=" << ind << " x=" << xptr[ind3d] << " y=" << yptr[ind3d] << " z=" << zptr[ind3d] << endl; + } LEAVE_LOCAL_MODE; + } LEAVE_SINGLEMAP_MODE; +#endif + // TODO: assert that there are enough ghost zones // Store result fasterp_iloc_t & iloc = ilocs.AT(n); iloc.mrc = mrc_t(m, rl, c); +#ifdef CARPET_DEBUG + iloc.pn.p = dist::rank(); + iloc.pn.n = n; + iloc.ipos = ipos * ext.stride(); + iloc.ind = ind; +#endif iloc.ind3d = ind3d; iloc.offset = dpos; // Find source processor int const p = Carpet::vhh.AT(m)->processor(rl,c); proc.AT(n) = p; - if (nlocs.AT(p) == 0) ++n_nz_nlocs; +#pragma omp atomic ++ nlocs.AT(p); // Output if (veryverbose) { - cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl; +#pragma omp critical + { + cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl; + } } } @@ -542,6 +683,12 @@ namespace CarpetInterp2 { // communicate. if (verbose) CCTK_INFO ("Determine set of communicating processors"); { + int n_nz_nlocs = 0; + for (int p=0; p<nprocs; ++p) { + if (nlocs.AT(p) > 0) { + ++ n_nz_nlocs; + } + } recv_descr.procs.resize (n_nz_nlocs); recv_descr.procinds.resize (nprocs, -1); int pp = 0; @@ -556,8 +703,8 @@ namespace CarpetInterp2 { offset += nlocs.AT(p); } } - ASSERT (pp == n_nz_nlocs); - ASSERT (offset == npoints); + assert (pp == n_nz_nlocs); + assert (offset == npoints); recv_descr.npoints = npoints; } @@ -574,25 +721,31 @@ namespace CarpetInterp2 { index.AT(pp) = recv_descr.procs.AT(pp).offset; } recv_descr.index.resize (npoints); + // TODO: parallelise with OpenMP for (int n=0; n<npoints; ++n) { int const p = proc.AT(n); int const pp = recv_descr.procinds.AT(p); - ASSERT (pp >= 0); + assert (pp >= 0); recv_descr.index.AT(n) = index.AT(pp); ++index.AT(pp); } for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { int const recv_npoints = index.AT(pp) - recv_descr.procs.AT(pp).offset; - ASSERT (recv_npoints == recv_descr.procs.AT(pp).npoints); + assert (recv_npoints == recv_descr.procs.AT(pp).npoints); } - vector<bool> received (npoints, false); +#ifndef NDEBUG + vector<bool> received (npoints); + fill (received, false); +#pragma omp parallel for for (int n=0; n<npoints; ++n) { assert (not received.AT(n)); received.AT(n) = true; } +#pragma omp parallel for for (int n=0; n<npoints; ++n) { assert (received.AT(n)); } +#endif } @@ -602,7 +755,8 @@ namespace CarpetInterp2 { if (verbose) { CCTK_INFO ("Count and exchange number of communicated grid points"); } - vector<int> recv_npoints (nprocs, 0), recv_offsets (nprocs); + vector<int> recv_npoints (nprocs), recv_offsets (nprocs); + fill (recv_npoints, 0); fill_with_poison (recv_offsets); { int offset = 0; @@ -612,7 +766,7 @@ namespace CarpetInterp2 { recv_offsets.AT(p) = offset; offset += recv_npoints.AT(p); } - ASSERT (offset == npoints); + assert (offset == npoints); } vector<int> send_npoints (nprocs), send_offsets (nprocs); fill_with_poison (send_npoints); @@ -629,6 +783,7 @@ namespace CarpetInterp2 { // Scatter the location information into a send-array, and // exchange it with MPI vector<fasterp_iloc_t> scattered_ilocs(npoints); +#pragma omp parallel for for (int n=0; n<npoints; ++n) { scattered_ilocs.AT(recv_descr.index.AT(n)) = ilocs.AT(n); } @@ -640,20 +795,35 @@ namespace CarpetInterp2 { fasterp_iloc_t::mpi_datatype(), comm_world); +#ifdef CARPET_DEBUG + // Ensure that the ilocs were sent to the right processor + for (int p=0; p<nprocs; ++p) { +#pragma omp parallel for + for (int n=0; n<send_npoints.at(p); ++n) { + fasterp_iloc_t const & iloc = gathered_ilocs.at(send_offsets.at(p) + n); + mrc_t const & mrc = iloc.mrc; + int const m = mrc.m; + int const rl = mrc.rl; + int const c = mrc.c; + assert (Carpet::vhh.AT(m)->is_local(rl,c)); + } + } +#endif + // Fill in send descriptors send_descr.npoints = npoints_send; { - int n_nz_recv_npoints = 0; + int n_nz_send_npoints = 0; for (int p=0; p<nprocs; ++p) { - if (recv_npoints.AT(p) > 0) ++n_nz_recv_npoints; + if (send_npoints.AT(p) > 0) ++n_nz_send_npoints; } - send_descr.procs.resize (n_nz_recv_npoints); + send_descr.procs.resize (n_nz_send_npoints); int pp = 0; for (int p=0; p<nprocs; ++p) { - if (recv_npoints.AT(p) > 0) { + if (send_npoints.AT(p) > 0) { send_proc_t & send_proc = send_descr.procs.AT(pp); send_proc.p = p; send_proc.offset = send_offsets.AT(p); @@ -661,7 +831,7 @@ namespace CarpetInterp2 { ++pp; } } - ASSERT (pp == n_nz_recv_npoints); + assert (pp == n_nz_send_npoints); } @@ -675,24 +845,27 @@ namespace CarpetInterp2 { vector<int> mrc2comp (maxmrc, -1); vector<int> comp2mrc (maxmrc); fill_with_poison (comp2mrc); - int comps = 0; + int ncomps = 0; - vector<int> npoints_comp (maxmrc, 0); + vector<int> npoints_comp (maxmrc); + fill (npoints_comp, 0); + // TODO: parallelise with OpenMP for (int n=0; n<send_proc.npoints; ++n) { fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n); int const mrc = iloc.mrc.get_ind(); if (mrc2comp.AT(mrc) == -1) { - mrc2comp.AT(mrc) = comps; - comp2mrc.AT(comps) = mrc; - ++ comps; + mrc2comp.AT(mrc) = ncomps; + comp2mrc.AT(ncomps) = mrc; + ++ ncomps; } ++ npoints_comp.AT(mrc); } - send_proc.comps.resize(comps); + assert (ncomps <= maxmrc); + send_proc.comps.resize(ncomps); int offset = 0; - for (int comp=0; comp<comps; ++comp) { + for (int comp=0; comp<ncomps; ++comp) { send_comp_t & send_comp = send_proc.comps.AT(comp); int const mrc = comp2mrc.AT(comp); send_comp.mrc = mrc; @@ -702,6 +875,7 @@ namespace CarpetInterp2 { int const m = themrc.m; int const rl = themrc.rl; int const c = themrc.c; + assert (Carpet::vhh.AT(m)->is_local(rl,c)); ibbox const & ext = Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; send_comp.lsh = ext.shape() / ext.stride(); @@ -710,10 +884,12 @@ namespace CarpetInterp2 { send_comp.npoints = npoints_comp.AT(mrc); offset += send_comp.npoints; } - ASSERT (offset == send_proc.npoints); + assert (offset == send_proc.npoints); send_proc.index.resize (send_proc.npoints); fill_with_poison (send_proc.index); + // TODO: This is not parallel! Loop over comps instead? + // #pragma omp parallel for for (int n=0; n<send_proc.npoints; ++n) { fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n); int const mrc = iloc.mrc.get_ind(); @@ -727,13 +903,84 @@ namespace CarpetInterp2 { send_comp.locs.push_back (sloc); } - for (int comp=0; comp<comps; ++comp) { + for (int comp=0; comp<ncomps; ++comp) { send_comp_t & send_comp = send_proc.comps.AT(comp); assert (int(send_comp.locs.size()) == send_comp.npoints); } +#ifndef NDEBUG + { + vector<bool> used(send_proc.npoints, false); + for (int n=0; n<send_proc.npoints; ++n) { + assert (not used.AT(send_proc.index.AT(n))); + used.AT(send_proc.index.AT(n)) = true; + } + for (int n=0; n<send_proc.npoints; ++n) { + assert (used.AT(send_proc.index.AT(n))); + } + } +#endif + +#ifdef CARPET_DEBUG + for (int comp=0; comp<ncomps; ++comp) { + send_comp_t const & send_comp = send_proc.comps.at(comp); + assert (int(send_comp.locs.size()) == send_comp.npoints); + for (int n=0; n<send_comp.npoints; ++n) { + fasterp_src_loc_t const & sloc = send_comp.locs.at(n); + assert (sloc.mrc == send_comp.mrc); + assert (all (sloc.saved_lsh == send_comp.lsh)); + } + } +#endif + } // for pp +#ifdef CARPET_DEBUG + { + if (verbose) CCTK_INFO ("Compare send and receive counts"); + + vector<int> recv_count (dist::size()); + fill (recv_count, 0); + for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) { + recv_proc_t const & recv_proc = recv_descr.procs.AT(pp); + assert (recv_count.AT(recv_proc.p) == 0); + assert (recv_proc.npoints > 0); + recv_count.AT(recv_proc.p) = recv_proc.npoints; + } + + vector<int> send_count (dist::size()); + fill (send_count, 0); + for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { + send_proc_t const & send_proc = send_descr.procs.AT(pp); + assert (send_count.AT(send_proc.p) == 0); + assert (send_proc.npoints > 0); + send_count.AT(send_proc.p) = send_proc.npoints; + } + + { + vector<int> tmp_count (dist::size()); + MPI_Alltoall (&send_count.front(), 1, MPI_INT, + &tmp_count .front(), 1, MPI_INT, + dist::comm()); + swap (send_count, tmp_count); + } + bool error = false; + for (int p=0; p<dist::size(); ++p) { + if (not (send_count.AT(p) == recv_count.AT(p))) { + ostringstream buf; + buf << "Error: processor " << p << " " + << "sends me " << send_count.AT(p) << " points, " + << "while I receive " << recv_count.AT(p) << " from it"; + CCTK_WARN (CCTK_WARN_ALERT, buf.str().c_str()); + error = true; + } + } + if (error) { + CCTK_WARN (CCTK_WARN_ABORT, "Internal error in interpolation setup"); + } + } +#endif + if (verbose) CCTK_INFO ("Done."); } @@ -758,33 +1005,36 @@ namespace CarpetInterp2 { { DECLARE_CCTK_PARAMETERS; + // Desired time level + int const tl = 0; // current time + size_t const nvars = varinds.size(); if (verbose) CCTK_VInfo (CCTK_THORNSTRING, "Interpolating %d variables", int(nvars)); - ASSERT (values.size() == nvars); + assert (values.size() == nvars); for (size_t v=0; v<values.size(); ++v) { int const vi = varinds.AT(v); - ASSERT (vi >= 0 and vi <= CCTK_NumVars()); + assert (vi >= 0 and vi <= CCTK_NumVars()); int const gi = CCTK_GroupIndexFromVarI (vi); - ASSERT (gi >= 0); + assert (gi >= 0); cGroup group; { int const ierr = CCTK_GroupData (gi, &group); - ASSERT (not ierr); + assert (not ierr); } - ASSERT (group.grouptype == CCTK_GF); - ASSERT (group.vartype == CCTK_VARIABLE_REAL); - ASSERT (group.dim == dim); + assert (group.grouptype == CCTK_GF); + assert (group.vartype == CCTK_VARIABLE_REAL); + assert (group.dim == dim); #if 0 // Cannot be called in level mode cGroupDynamicData dyndata; { int const ierr = CCTK_GroupDynamicData (cctkGH, gi, &dyndata); - ASSERT (not ierr); + assert (not ierr); } - ASSERT (dyndata.activetimelevels >= 1); + assert (dyndata.activetimelevels > tl); #endif - ASSERT (values.AT(v) != NULL); + assert (values.AT(v) != NULL); } MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); @@ -795,6 +1045,10 @@ namespace CarpetInterp2 { vector<CCTK_REAL> recv_points (recv_descr.npoints * nvars); fill_with_poison (recv_points); vector<MPI_Request> recv_reqs (recv_descr.procs.size()); +#ifdef CARPET_DEBUG + vector<pn_t> recv_pn (recv_descr.npoints); + vector<MPI_Request> recv_reqs_pn (recv_descr.procs.size()); +#endif for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) { recv_proc_t const & recv_proc = recv_descr.procs.AT(pp); @@ -802,83 +1056,113 @@ namespace CarpetInterp2 { recv_proc.npoints * nvars, dist::datatype<CCTK_REAL>(), recv_proc.p, mpi_tag, comm_world, & recv_reqs.AT(pp)); +#ifdef CARPET_DEBUG + MPI_Irecv (& recv_pn.AT(recv_proc.offset), + recv_proc.npoints * 2, + dist::datatype<int>(), recv_proc.p, mpi_tag, + comm_world, & recv_reqs_pn.AT(pp)); +#endif } // Interpolate data and post Isends if (verbose) CCTK_INFO ("Interpolating and posting MPI_Isends"); + // TODO: Use one arrays per processor? vector<CCTK_REAL> send_points (send_descr.npoints * nvars); fill_with_poison (send_points); vector<MPI_Request> send_reqs (send_descr.procs.size()); +#ifdef CARPET_DEBUG + vector<pn_t> send_pn (send_descr.npoints); + vector<MPI_Request> send_reqs_pn (send_descr.procs.size()); +#endif for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { send_proc_t const & send_proc = send_descr.procs.AT(pp); - vector<CCTK_REAL>::iterator - destit = send_points.begin() + send_proc.offset * nvars; - vector<CCTK_REAL>::iterator const - endit = destit + send_proc.npoints * nvars; - + vector<CCTK_REAL> computed_points (send_proc.npoints * nvars); + fill_with_poison (computed_points); +#ifdef CARPET_DEBUG + vector<pn_t> computed_pn (send_descr.npoints); +#endif for (size_t comp=0; comp<send_proc.comps.size(); ++comp) { send_comp_t const & send_comp = send_proc.comps.AT(comp); int const m = send_comp.mrc.m; int const rl = send_comp.mrc.rl; int const c = send_comp.mrc.c; - int const tl = 0; // Get pointers to 3D data vector<CCTK_REAL const *> varptrs (nvars); for (size_t v=0; v<nvars; ++v) { int const gi = CCTK_GroupIndexFromVarI (varinds.AT(v)); - ASSERT (gi >= 0); + assert (gi >= 0); int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); - ASSERT (vi >= 0); + assert (vi >= 0); varptrs.AT(v) = (CCTK_REAL const *) (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi)) (tl, rl, c, Carpet::mglevel)->storage(); - ASSERT (varptrs.AT(v)); + assert (varptrs.AT(v)); } - for (size_t n=0; n<send_comp.locs.size(); ++n) { - ASSERT (destit + nvars <= endit); +#pragma omp parallel for + for (int n=0; n<int(send_comp.locs.size()); ++n) { + size_t const ind = (send_comp.offset + n) * nvars; send_comp.locs.AT(n).interpolate - (send_comp.lsh, order, varptrs, & * destit); - destit += nvars; + (send_comp.lsh, order, varptrs, &computed_points.AT(ind)); +#ifdef CARPET_DEBUG + computed_pn.AT(send_comp.offset + n) = send_comp.locs.AT(n).pn; +#endif } - } // for comps - - ASSERT (destit == endit); + } // for comp // Gather send points - vector<CCTK_REAL> gathered_send_points (send_proc.npoints * nvars); - fill_with_poison (gathered_send_points); +#pragma omp parallel for for (int n=0; n<send_proc.npoints; ++n) { - int const nn = send_proc.offset + send_proc.index.AT(n); + size_t const nn = send_proc.index.AT(n); for (size_t v=0; v<nvars; ++v) { - gathered_send_points.AT(n * nvars + v) = - send_points.AT(nn * nvars + v); + send_points.AT((send_proc.offset + n) * nvars + v) = + computed_points.AT(nn * nvars + v); } - } - for (int n=0; n<int(send_proc.npoints * nvars); ++n) { - int const nn = send_proc.offset * nvars + n; - send_points.AT(nn) = gathered_send_points.AT(n); +#ifdef CARPET_DEBUG + send_pn.AT(send_proc.offset + n) = computed_pn.AT(nn); +#endif } // TODO: Use a scatter index list instead of a gather index // list, and combine the scattering with the calculation +#ifdef CARPET_DEBUG +#pragma omp parallel for + for (int n=0; n<send_proc.npoints; ++n) { + assert (send_pn.AT(send_proc.offset + n).p == send_proc.p); + if (n>0) { + assert (send_pn.AT(send_proc.offset + n ).n > + send_pn.AT(send_proc.offset + n-1).n); + } + } +#endif + MPI_Isend (& send_points.AT(send_proc.offset * nvars), send_proc.npoints * nvars, dist::datatype<CCTK_REAL>(), send_proc.p, mpi_tag, comm_world, & send_reqs.AT(pp)); +#ifdef CARPET_DEBUG + MPI_Isend (& send_pn.AT(send_proc.offset), + send_proc.npoints * 2, + dist::datatype<int>(), send_proc.p, mpi_tag, + comm_world, & send_reqs_pn.AT(pp)); +#endif } // for pp // Wait for Irecvs to complete if (verbose) CCTK_INFO ("Waiting for MPI_Irevcs to complete"); MPI_Waitall (recv_reqs.size(), & recv_reqs.front(), MPI_STATUSES_IGNORE); +#ifdef CARPET_DEBUG + MPI_Waitall (recv_reqs.size(), & recv_reqs_pn.front(), MPI_STATUSES_IGNORE); +#endif // Gather data if (verbose) CCTK_INFO ("Gathering data"); +#pragma omp parallel for for (int n=0; n<recv_descr.npoints; ++n) { size_t const nn = recv_descr.index.AT(n); for (size_t v=0; v<nvars; ++v) { @@ -887,11 +1171,18 @@ namespace CarpetInterp2 { recv_points.AT(nn * nvars + v) = poison; #endif } +#ifdef CARPET_DEBUG + assert (recv_pn.AT(nn).p == dist::rank()); + assert (recv_pn.AT(nn).n == n); +#endif } // Wait for Isends to complete if (verbose) CCTK_INFO ("Waiting for MPI_Isends to complete"); MPI_Waitall (send_reqs.size(), & send_reqs.front(), MPI_STATUSES_IGNORE); +#ifdef CARPET_DEBUG + MPI_Waitall (send_reqs.size(), & send_reqs_pn.front(), MPI_STATUSES_IGNORE); +#endif if (verbose) CCTK_INFO ("Done."); } diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh index d136ff900..3981f0a67 100644 --- a/Carpet/CarpetInterp2/src/fasterp.hh +++ b/Carpet/CarpetInterp2/src/fasterp.hh @@ -23,13 +23,15 @@ namespace CarpetInterp2 { int const dim = 3; - // An interpolation point descriptor requires (3 * (max_order+1) + - // 1) double precision values of memory + // Each interpolation point descriptor requires + // (dim * (max_order+1) + 1) + // CCTK_REAL values of memory int const max_order = 5; - // Keep a map, refinement level, and component index together + // Keep a map, refinement level, and component index together, and + // map the triple to small integers struct mrc_t { static int maps; static int reflevels; @@ -66,6 +68,11 @@ namespace CarpetInterp2 { return c + components * (rl + reflevels * m); } + bool operator== (mrc_t const & a) const + { + return m == a.m and rl == a.rl and c == a.c; + } + void output (ostream& os) const; }; @@ -73,8 +80,33 @@ namespace CarpetInterp2 { ostream& operator<< (ostream& os, mrc_t const & mrc) { mrc.output(os); return os; } + + + // Keep a processor and an integer index together + struct pn_t { + int p; + int n; + pn_t () + { + } + pn_t (int const p_, int const n_) + : p(p_), n(n_) + { + assert (p>=0 and p<dist::size()); + assert (n>=0); + } + + void output (ostream& os) const; + }; + + inline + ostream& operator<< (ostream& os, pn_t const & pn) + { pn.output(os); return os; } + + + // A global location, given by its global coordinates struct fasterp_glocs_t { vector<CCTK_REAL> coords[dim]; @@ -106,8 +138,12 @@ namespace CarpetInterp2 { struct fasterp_iloc_t { mrc_t mrc; // map, refinement level, component - // ivect idx; - int ind3d; +#ifdef CARPET_DEBUG + pn_t pn; // origin of this point + ivect ipos; // closest grid point (Carpet indexing) + ivect ind; // closest grid point (local indexing) +#endif + int ind3d; // closest grid point rvect offset; // in terms of grid points static MPI_Datatype mpi_datatype (); @@ -130,9 +166,22 @@ namespace CarpetInterp2 { CCTK_REAL coeffs[dim][max_order+1]; // interpolation coefficients bvect exact; - // ivect idx; +#ifdef CARPET_DEBUG + public: + pn_t pn; // origin of this point + mrc_t mrc; // map, refinement level, component + ivect ipos; // closest grid point (Carpet indexing) + ivect ind; // source grid point offset + private: +#endif int ind3d; // source grid point offset +#ifdef CARPET_DEBUG + public: + ivect saved_lsh; // copy of lsh + private: +#endif + public: void calc_stencil (fasterp_iloc_t const & iloc, @@ -211,8 +260,8 @@ namespace CarpetInterp2 { int offset; int npoints; // total number of sent points - vector<int> index; // gather index list: index[mpi-location] = - // calculation-location + // gather index list: index[mpi-location] = calculation-location + vector<int> index; }; struct send_descr_t { @@ -249,7 +298,8 @@ namespace CarpetInterp2 { vector<CCTK_REAL *> & values) const; - size_t get_npoints () + size_t + get_npoints () const { return recv_descr.npoints; |