diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2008-07-24 20:48:14 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@cct.lsu.edu> | 2008-07-24 20:48:14 -0500 |
commit | c8746c53015a2440ab211cf099e4d8d552cce904 (patch) | |
tree | a46dbd1be74f064cf6108c76b12006613bbbd868 | |
parent | 3061b97f509a3881a575b2ac8379b1d1d2a85c5a (diff) | |
parent | 129af4cef8b226a012edf80e98d2bb7c33cc2c3b (diff) |
Merge /Users/eschnett/Cbeta/carpet
-rw-r--r-- | Carpet/CarpetInterp2/param.ccl | 4 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.cc | 519 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.hh | 91 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/dist.hh | 49 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/vect.hh | 14 |
5 files changed, 492 insertions, 185 deletions
diff --git a/Carpet/CarpetInterp2/param.ccl b/Carpet/CarpetInterp2/param.ccl index 9061931b7..00363ad92 100644 --- a/Carpet/CarpetInterp2/param.ccl +++ b/Carpet/CarpetInterp2/param.ccl @@ -1 +1,5 @@ # Parameter definitions for thorn CarpetInterp + +BOOLEAN veryverbose "Produce debugging output" STEERABLE=always +{ +} "no" diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc index ac56ee293..dee352b76 100644 --- a/Carpet/CarpetInterp2/src/fasterp.cc +++ b/Carpet/CarpetInterp2/src/fasterp.cc @@ -1,13 +1,16 @@ +#include <algorithm> #include <cassert> #include <cmath> #include <cstddef> #include <cstdlib> +#include <iostream> #include <cctk.h> #include <mpi.h> #include <carpet.hh> +#include <vect.hh> #include "fasterp.hh" @@ -17,6 +20,40 @@ namespace CarpetInterp2 { + // Erik's gdb cannot print local variables in functions where an + // assert fails. Hence the calls to assert are temporarily moved + // into a function of their own. + void ASSERT (bool const cond) + { + if (cond) return; + abort(); + } + + + + int const ipoison = -1234567890; + CCTK_REAL const poison = -1.0e+12; + + template <typename T> T get_poison () { abort(); } + template<> int get_poison () { return ipoison; } + template<> CCTK_REAL get_poison () { return poison; } + + template <typename T> + void fill (vector<T> & v, T const & val) + { + fill (v.begin(), v.end(), val); + } + + template <typename T> + void fill_with_poison (vector<T> & v) + { +#ifndef NDEBUG + fill (v, get_poison<T>()); +#endif + } + + + // Create an MPI datatype for location information MPI_Datatype fasterp_iloc_t::mpi_datatype () @@ -28,14 +65,11 @@ namespace CarpetInterp2 { #define ARRSIZE(m) (sizeof(s.m) / sizeof(s.m[0])) #define OFFSET(m) ((char*)&(s.m) - (char*)&(s)) // offsetof doesn't work (why?) #define SIZE (sizeof(s)) - CCTK_REAL rdummy; dist::mpi_struct_descr_t const descr[] = { - { 1, OFFSET(m ), MPI_INT }, - { 1, OFFSET(rl ), MPI_INT }, - { 1, OFFSET(c ), MPI_INT }, - { 1, OFFSET(ind3d ), MPI_INT }, - {ARRSIZE(offset), OFFSET(offset), dist::datatype(rdummy)}, - { 1, SIZE , MPI_UB } + { 3, OFFSET(mrc ), MPI_INT }, + { 1, OFFSET(ind3d ), MPI_INT }, + {ARRSIZE(offset), OFFSET(offset), dist::datatype<CCTK_REAL>()}, + { 1, SIZE , MPI_UB } }; #undef ARRSIZE #undef OFFSET @@ -47,6 +81,67 @@ namespace CarpetInterp2 { return newtype; } + void + fasterp_iloc_t:: + output (ostream& os) + const + { + os << "fasterp_iloc_t{" + << "mrc=" << mrc << "," + << "ind3d=" << ind3d << "," + << "offset=" << offset << "}"; + } + + + + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + + + + int mrc_t::maps = -1; + int mrc_t::reflevels = -1; + int mrc_t::components = -1; + + void + mrc_t:: + determine_mrc_info () + { + maps = Carpet::maps; + reflevels = Carpet::reflevels; + components = 0; + for (int m=0; m<maps; ++m) { + for (int rl=0; rl<reflevels; ++rl) { + int const ncomps = Carpet::vhh.AT(m)->components (rl); + components = max (components, ncomps); + } + } + } + + mrc_t:: + mrc_t (int ind) + { + ASSERT (ind >= 0); + int const ind0 = ind; + c = ind % components; + ind /= components; + rl = ind % reflevels; + ind /= reflevels; + m = ind % maps; + ind /= maps; + ASSERT (ind == 0); + ASSERT (get_ind() == ind0); + } + + void + mrc_t:: + output (ostream& os) + const + { + os << "mrc{m=" << m << ",rl=" << rl << ",c=" << c << "}"; + } + ////////////////////////////////////////////////////////////////////////////// @@ -56,32 +151,57 @@ namespace CarpetInterp2 { // Calculate the coefficients for one interpolation stencil + // TODO: Could templatify this function on the order to improve + // efficiency void fasterp_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, + ivect const & lsh, int const order) { - assert (order <= max_order); + ASSERT (order <= max_order); CCTK_REAL const rone = 1.0; CCTK_REAL const eps = 1.0e-12; - int const n0 = (order+1) / 2; - CCTK_REAL const offset = order % 2 == 0 ? 0.5 : 0.0; + +#ifndef NDEBUG + // Poison all coefficients for (int d=0; d<dim; ++d) { - // C_n = Pi_m[m!=n] [(x_i - x_m) / (x_n - x_m)] - CCTK_REAL const xi = iloc.offset[d] - offset; - if (order % 2 == 0) { - assert (xi >= -rone/2 and xi < +rone/2); - } else { - assert (xi >= 0 and xi < rone); + for (int n=0; n<=max_order; ++n) { + coeffs[d][n] = poison; + } + } +#endif + + if (order == 0) { + for (int d=0; d<dim; ++d) { + exact[d] = true; } - if (fabs(xi) >= eps) { - assert (fabs(fmod(xi, rone)) <= eps/2); + ind3d = iloc.ind3d; + return; + } + + // Choose stencil anchor (first stencil point) + ivect iorigin; + if (order % 2 == 0) { + iorigin = - ivect(order/2); + } else { + // Potentially shift the stencil anchor for odd interpolation + // orders (i.e., for even numbers of stencil points) + ivect const ioffset (iloc.offset < 0.0); + iorigin = - ivect((order-1)/2) - ioffset; + } + rvect const offset = iloc.offset - rvect(iorigin); + + for (int d=0; d<dim; ++d) { + // C_n = Pi_m[m!=n] [(x_i - x_m) / (x_n - x_m)] + CCTK_REAL const xi = offset[d]; + if (abs(remainder(xi, rone)) < eps) { for (int n=0; n<=order; ++n) { - CCTK_REAL const xn = n - n0; + CCTK_REAL const xn = n; CCTK_REAL c = 1.0; for (int m=0; m<=order; ++m) { if (m != n) { - CCTK_REAL const xm = m - n0; + CCTK_REAL const xm = m; c *= (xi - xm) / (xn - xm); } } @@ -93,12 +213,17 @@ namespace CarpetInterp2 { for (int n=0; n<=order; ++n) { s += coeffs[d][n]; } - assert (fabs(s - rone) <= eps); + ASSERT (fabs(s - rone) <= eps); } else { + // The interpolation point coincides with a grid point; no + // interpolation is necessary (this is a special case) + iorigin[d] += int(round(xi)); exact[d] = true; } } - ind3d = iloc.ind3d; + + // Set 3D location of stencil anchor + ind3d = iloc.ind3d + index(lsh, iorigin); } @@ -119,6 +244,10 @@ namespace CarpetInterp2 { CCTK_REAL * restrict const vals) const { + ASSERT (O0 == 0 or not exact[0]); + ASSERT (O1 == 0 or not exact[1]); + ASSERT (O2 == 0 or not exact[2]); + size_t const di = 1; size_t const dj = di * lsh[0]; size_t const dk = dj * lsh[1]; @@ -128,11 +257,14 @@ namespace CarpetInterp2 { } for (size_t k=0; k<=O2; ++k) { - CCTK_REAL const coeff_k = coeffs[2][k]; + ASSERT (O2 == 0 or coeffs[2][k] != poison); + CCTK_REAL const coeff_k = (O2==0 ? 1.0 : coeffs[2][k]); for (size_t j=0; j<=O1; ++j) { - CCTK_REAL const coeff_jk = coeff_k * coeffs[1][j]; + ASSERT (O1 == 0 or coeffs[1][j] != poison); + CCTK_REAL const coeff_jk = coeff_k * (O1==0 ? 1.0 : coeffs[1][j]); for (size_t i=0; i<=O0; ++i) { - CCTK_REAL const coeff_ijk = coeff_jk * coeffs[0][i]; + ASSERT (O0 == 0 or coeffs[0][i] != poison); + CCTK_REAL const coeff_ijk = coeff_jk * (O0==0 ? 1.0 : coeffs[0][i]); for (size_t v=0; v<varptrs.size(); ++v) { vals[v] += coeff_ijk * varptrs.AT(v)[ind3d + i*di + j*dj + k*dk]; @@ -163,25 +295,25 @@ namespace CarpetInterp2 { if (exact[0]) { interpolate<Z,Z,Z> (lsh, varptrs, vals); } else { - interpolate<Z,Z,O> (lsh, varptrs, vals); + interpolate<O,Z,Z> (lsh, varptrs, vals); } } else { if (exact[0]) { interpolate<Z,O,Z> (lsh, varptrs, vals); } else { - interpolate<Z,O,O> (lsh, varptrs, vals); + interpolate<O,O,Z> (lsh, varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { - interpolate<O,Z,Z> (lsh, varptrs, vals); + interpolate<Z,Z,O> (lsh, varptrs, vals); } else { interpolate<O,Z,O> (lsh, varptrs, vals); } } else { if (exact[0]) { - interpolate<O,O,Z> (lsh, varptrs, vals); + interpolate<Z,O,O> (lsh, varptrs, vals); } else { interpolate<O,O,O> (lsh, varptrs, vals); } @@ -219,7 +351,7 @@ namespace CarpetInterp2 { default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented"); - assert (0); + ASSERT (0); } } @@ -261,7 +393,7 @@ namespace CarpetInterp2 { (cctkGH, dim, npoints, coords, &local_locations.maps.front(), local_coords, NULL, NULL); - assert (not ierr); + ASSERT (not ierr); } else { // This is a single-patch simulation @@ -301,11 +433,18 @@ namespace CarpetInterp2 { setup (cGH const * restrict const cctkGH, fasterp_llocs_t const & locations) { + DECLARE_CCTK_PARAMETERS; + // Some global properties int const npoints = locations.size(); int const nprocs = CCTK_nProcs (cctkGH); // int const myproc = CCTK_MyProc (cctkGH); + mrc_t::determine_mrc_info(); + int const maxmrc = mrc_t::get_max_ind(); + + MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); + // Obtain the coordinate ranges for all patches @@ -318,7 +457,7 @@ namespace CarpetInterp2 { GetCoordRange (cctkGH, m, Carpet::mglevel, dim, & gsh[0], & lower.AT(m)[0], & upper.AT(m)[0], & delta.AT(m)[0]); - assert (not ierr); + ASSERT (not ierr); delta.AT(m) /= Carpet::maxspacereflevelfact; } @@ -326,9 +465,10 @@ namespace CarpetInterp2 { // indices vector<fasterp_iloc_t> ilocs (npoints); vector<int> proc (npoints); + fill_with_poison (proc); vector<int> nlocs (nprocs, 0); int n_nz_nlocs = 0; - assert (Carpet::is_level_mode()); + ASSERT (Carpet::is_level_mode()); for (int n=0; n<npoints; ++n) { int const m = locations.maps.AT(n); rvect const pos (locations.coords[0].AT(n), @@ -348,27 +488,26 @@ namespace CarpetInterp2 { Carpet::mglevel, Carpet::reflevel, Carpet::reflevel+1, rl, c, ipos); + ASSERT (rl>=0 and c>=0); ibbox const & ext = Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; - rvect dpos = rpos - rvect(ipos); - if (order % 2 != 0) { - // Potentially shift the stencil anchor for odd interpolation - // orders (i.e., for even numbers of stencil points) - ipos -= either (dpos > rvect(0), ext.stride(), ivect(0)); - dpos = rpos - rvect(ipos); - } + + // Convert from Carpet indexing to grid point indexing + ASSERT (all (ipos % ext.stride() == ivect(0))); + ipos /= ext.stride(); + dpos /= rvect(ext.stride()); + ASSERT (all (abs(dpos) <= rvect(0.5))); ivect const ind = ipos - ext.lower() / ext.stride(); ivect const lsh = ext.shape() / ext.stride(); - int const ind3d = ind[0] + lsh[0] * (ind[1] + lsh[1] * ind[2]); + int const ind3d = index (lsh, ind); + // TODO: ASSERT that there are enough ghost zones // Store result fasterp_iloc_t & iloc = ilocs.AT(n); - iloc.m = m; - iloc.rl = rl; - iloc.c = c; + iloc.mrc = mrc_t(m, rl, c); iloc.ind3d = ind3d; iloc.offset = dpos; @@ -377,6 +516,11 @@ namespace CarpetInterp2 { proc.AT(n) = p; if (nlocs.AT(p) == 0) ++n_nz_nlocs; ++ nlocs.AT(p); + + // Output + if (veryverbose) { + cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl; + } } @@ -400,8 +544,8 @@ namespace CarpetInterp2 { offset += nlocs.AT(p); } } - assert (pp == n_nz_nlocs); - assert (offset == npoints); + ASSERT (pp == n_nz_nlocs); + ASSERT (offset == npoints); recv_descr.npoints = npoints; } @@ -410,6 +554,7 @@ namespace CarpetInterp2 { // from the other processors. { vector<int> index (recv_descr.procs.size()); + fill_with_poison (index); for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { index.AT(pp) = recv_descr.procs.AT(pp).offset; } @@ -417,13 +562,21 @@ namespace CarpetInterp2 { for (int n=0; n<npoints; ++n) { int const p = proc.AT(n); int const pp = recv_descr.procinds.AT(p); - assert (pp >= 0); + ASSERT (pp >= 0); recv_descr.index.AT(n) = index.AT(pp); ++index.AT(pp); } for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { int const recv_npoints = index.AT(pp) - recv_descr.procs.AT(pp).offset; - assert (recv_npoints == recv_descr.procs.AT(pp).npoints); + ASSERT (recv_npoints == recv_descr.procs.AT(pp).npoints); + } + vector<bool> received (npoints, false); + for (int n=0; n<npoints; ++n) { + assert (not received.AT(n)); + received.AT(n) = true; + } + for (int n=0; n<npoints; ++n) { + assert (received.AT(n)); } } @@ -432,6 +585,7 @@ namespace CarpetInterp2 { // Count the number of points which have to be sent to other // processors, and exchange this information with MPI vector<int> send_npoints (nprocs, 0), send_offsets (nprocs); + fill_with_poison (send_offsets); { int offset = 0; for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { @@ -440,12 +594,14 @@ namespace CarpetInterp2 { send_offsets.AT(p) = offset; offset += send_npoints.AT(p); } - assert (offset == npoints); + ASSERT (offset == npoints); } vector<int> recv_npoints (nprocs), recv_offsets (nprocs); + fill_with_poison (recv_npoints); + fill_with_poison (recv_offsets); MPI_Alltoall (&send_npoints.front(), 1, MPI_INT, &recv_npoints.front(), 1, MPI_INT, - MPI_COMM_WORLD); + comm_world); int npoints_source = 0; for (int p=0; p<nprocs; ++p) { recv_offsets.AT(p) = npoints_source; @@ -459,7 +615,6 @@ namespace CarpetInterp2 { scattered_ilocs.AT(recv_descr.index.AT(n)) = ilocs.AT(n); } vector<fasterp_iloc_t> gathered_ilocs(npoints_source); - MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); MPI_Alltoallv (&scattered_ilocs.front(), &send_npoints.front(), &send_offsets.front(), fasterp_iloc_t::mpi_datatype(), @@ -483,90 +638,81 @@ namespace CarpetInterp2 { if (recv_npoints.AT(p) > 0) { send_proc_t & send_proc = send_descr.procs.AT(pp); send_proc.p = p; - send_proc.offset = recv_offsets.AT(p); - send_proc.npoints = recv_npoints.AT(p); + send_proc.offset = send_offsets.AT(p); + send_proc.npoints = send_npoints.AT(p); ++pp; } } - assert (pp == n_nz_recv_npoints); + ASSERT (pp == n_nz_recv_npoints); } + + + // Calculate stencil coefficients + for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { send_proc_t & send_proc = send_descr.procs.AT(pp); - send_proc.maps.resize (Carpet::maps); - for (int m=0; m<Carpet::maps; ++m) { - send_proc.maps.AT(m).rls.resize (Carpet::reflevels); - for (int rl=0; rl<Carpet::reflevels; ++rl) { - int const ncomps = Carpet::vhh.AT(m)->components (rl); - send_proc.maps.AT(m).rls.AT(rl).compinds.resize (ncomps, -1); - } - } - - vector<vector<vector<int> > > npoints_comp (Carpet::maps); - for (int m=0; m<Carpet::maps; ++m) { - npoints_comp.AT(m).resize(Carpet::reflevels); - for (int rl=0; rl<Carpet::reflevels; ++rl) { - int const ncomps = Carpet::vhh.AT(m)->components (rl); - npoints_comp.AT(m).AT(rl).resize(ncomps, 0); - } - } + vector<int> mrc2comp (maxmrc, -1); + vector<int> comp2mrc (maxmrc); + fill_with_poison (comp2mrc); + int comps = 0; - vector<vector<int> > n_nz_npoints_comp (Carpet::maps); - for (int m=0; m<Carpet::maps; ++m) { - n_nz_npoints_comp.AT(m).resize(Carpet::reflevels, 0); - } + vector<int> npoints_comp (maxmrc, 0); for (int n=0; n<send_proc.npoints; ++n) { - fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + pp); - int const m = iloc.m; - int const rl = iloc.rl; - int const c = iloc.c; - if (npoints_comp.AT(m).AT(rl).AT(c) == 0) { - ++ n_nz_npoints_comp.AT(m).AT(rl); + fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n); + int const mrc = iloc.mrc.get_ind(); + if (mrc2comp.AT(mrc) == -1) { + mrc2comp.AT(mrc) = comps; + comp2mrc.AT(comps) = mrc; + ++ comps; } - ++ npoints_comp.AT(m).AT(rl).AT(c); + ++ npoints_comp.AT(mrc); } + send_proc.comps.resize(comps); - for (int m=0; m<Carpet::maps; ++m) { - for (int rl=0; rl<Carpet::reflevels; ++rl) { - send_proc.maps.AT(m).rls.AT(rl).comps.resize - (n_nz_npoints_comp.AT(m).AT(rl)); - int cc = 0; - int const ncomps = Carpet::vhh.AT(m)->components (rl); - for (int c=0; c<ncomps; ++c) { - if (npoints_comp.AT(m).AT(rl).AT(c) > 0) { - send_comp_t & comp = send_proc.maps.AT(m).rls.AT(rl).comps.AT(cc); - comp.locs.clear(); - comp.locs.reserve (npoints_comp.AT(m).AT(rl).AT(c)); - comp.c = c; - send_proc.maps.AT(m).rls.AT(rl).compinds.AT(c) = cc; - ++cc; - } - } - } + int offset = 0; + for (int comp=0; comp<comps; ++comp) { + send_comp_t & send_comp = send_proc.comps.AT(comp); + int const mrc = comp2mrc.AT(comp); + send_comp.mrc = mrc; + send_comp.locs.reserve (npoints_comp.AT(mrc)); + + mrc_t const themrc (mrc); + int const m = themrc.m; + int const rl = themrc.rl; + int const c = themrc.c; + ibbox const & ext = + Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; + send_comp.lsh = ext.shape() / ext.stride(); + + send_comp.offset = offset; + send_comp.npoints = npoints_comp.AT(mrc); + offset += send_comp.npoints; } + ASSERT (offset == send_proc.npoints); - } // for pp - - - - // Calculate stencil coefficients - for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { - send_proc_t & send_proc = send_descr.procs.AT(pp); + send_proc.index.resize (send_proc.npoints); + fill_with_poison (send_proc.index); for (int n=0; n<send_proc.npoints; ++n) { - fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset+pp); - int const m = iloc.m; - int const rl = iloc.rl; - int const c = iloc.c; + fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + n); + int const mrc = iloc.mrc.get_ind(); + int const comp = mrc2comp.AT(mrc); + send_comp_t & send_comp = send_proc.comps.AT(comp); - fasterp_src_loc_t sloc; - sloc.calc_stencil (iloc, order); + send_proc.index.AT(n) = send_comp.offset + send_comp.locs.size(); - int const cc = send_proc.maps.AT(m).rls.AT(rl).compinds.AT(c); - send_comp_t & comp = send_proc.maps.AT(m).rls.AT(rl).comps.AT(cc); - comp.locs.push_back (sloc); + fasterp_src_loc_t sloc; + sloc.calc_stencil (iloc, send_comp.lsh, order); + send_comp.locs.push_back (sloc); + } + + for (int comp=0; comp<comps; ++comp) { + send_comp_t & send_comp = send_proc.comps.AT(comp); + assert (int(send_comp.locs.size()) == send_comp.npoints); } + } // for pp } @@ -590,94 +736,127 @@ namespace CarpetInterp2 { const { size_t const nvars = varinds.size(); - assert (values.size() == nvars); + ASSERT (values.size() == nvars); for (size_t v=0; v<values.size(); ++v) { - assert (varinds.AT(v) >= 0 and varinds.AT(v) <= CCTK_NumVars()); - // Ensure that variable is GF - // Ensure that variable has "good" type - // Ensure that variable has storage - assert (values.AT(v) != NULL); + int const vi = varinds.AT(v); + ASSERT (vi >= 0 and vi <= CCTK_NumVars()); + int const gi = CCTK_GroupIndexFromVarI (vi); + ASSERT (gi >= 0); + cGroup group; + { + int const ierr = CCTK_GroupData (gi, &group); + ASSERT (not ierr); + } + ASSERT (group.grouptype == CCTK_GF); + ASSERT (group.vartype == CCTK_VARIABLE_REAL); + ASSERT (group.dim == dim); +#if 0 + // Cannot be called in level mode + cGroupDynamicData dyndata; + { + int const ierr = CCTK_GroupDynamicData (cctkGH, gi, &dyndata); + ASSERT (not ierr); + } + ASSERT (dyndata.activetimelevels >= 1); +#endif + ASSERT (values.AT(v) != NULL); } MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); + int const mpi_tag = 0; // Post Irecvs vector<CCTK_REAL> recv_points (recv_descr.npoints * nvars); + fill_with_poison (recv_points); vector<MPI_Request> recv_reqs (recv_descr.procs.size()); for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) { recv_proc_t const & recv_proc = recv_descr.procs.AT(pp); - CCTK_REAL rdummy; - int const tag = 0; MPI_Irecv (& recv_points.AT(recv_proc.offset * nvars), recv_proc.npoints * nvars, - dist::datatype (rdummy), recv_proc.p, tag, + dist::datatype<CCTK_REAL>(), recv_proc.p, mpi_tag, comm_world, & recv_reqs.AT(pp)); } // Interpolate data and post Isends - vector<CCTK_REAL> send_points (send_descr.npoints); - vector<CCTK_REAL>::iterator destit = send_points.begin(); + vector<CCTK_REAL> send_points (send_descr.npoints * nvars); + fill_with_poison (send_points); vector<MPI_Request> send_reqs (send_descr.procs.size()); for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { send_proc_t const & send_proc = send_descr.procs.AT(pp); - for (size_t m=0; m<send_proc.maps.size(); ++m) { - send_map_t const & send_map = send_proc.maps.AT(m); - for (size_t rl=0; rl<send_map.rls.size(); ++rl) { - send_rl_t const & send_rl = send_map.rls.AT(rl); - for (size_t cc=0; cc<send_rl.comps.size(); ++cc) { - send_comp_t const & send_comp = send_rl.comps.AT(cc); - int const c = send_comp.c; - - dh const & dd = * Carpet::vdd.AT(m); - ibbox const & - ext = dd.boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; - ivect const lsh = ext.shape() / ext.stride(); - - // Get pointers to 3D data - vector<CCTK_REAL const *> varptrs (nvars); - for (size_t v=0; v<nvars; ++v) { - int const gi = CCTK_GroupIndexFromVarI (varinds.AT(v)); - assert (gi >= 0); - int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); - assert (vi >= 0); - int const tl = 0; - varptrs.AT(v) = - (CCTK_REAL const *) - (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi)) - (tl, rl, c, Carpet::mglevel)->storage(); - assert (varptrs.AT(v)); - } - - for (size_t n=0; n<send_comp.locs.size(); ++n) { - assert (destit + nvars <= send_points.end()); - send_comp.locs.AT(n).interpolate - (lsh, order, varptrs, & * destit); - destit += nvars; - } - - } // for cc - } // for rl - } // for m + vector<CCTK_REAL>::iterator + destit = send_points.begin() + send_proc.offset * nvars; + vector<CCTK_REAL>::iterator const + endit = destit + send_proc.npoints * nvars; + + for (size_t comp=0; comp<send_proc.comps.size(); ++comp) { + send_comp_t const & send_comp = send_proc.comps.AT(comp); + + int const m = send_comp.mrc.m; + int const rl = send_comp.mrc.rl; + int const c = send_comp.mrc.c; + int const tl = 0; + + // Get pointers to 3D data + vector<CCTK_REAL const *> varptrs (nvars); + for (size_t v=0; v<nvars; ++v) { + int const gi = CCTK_GroupIndexFromVarI (varinds.AT(v)); + ASSERT (gi >= 0); + int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); + ASSERT (vi >= 0); + varptrs.AT(v) = + (CCTK_REAL const *) + (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi)) + (tl, rl, c, Carpet::mglevel)->storage(); + ASSERT (varptrs.AT(v)); + } + + for (size_t n=0; n<send_comp.locs.size(); ++n) { + ASSERT (destit + nvars <= endit); + send_comp.locs.AT(n).interpolate + (send_comp.lsh, order, varptrs, & * destit); + destit += nvars; + } + + } // for comps + + ASSERT (destit == endit); + + // Gather send points + vector<CCTK_REAL> gathered_send_points (send_proc.npoints * nvars); + fill_with_poison (gathered_send_points); + for (int n=0; n<send_proc.npoints; ++n) { + int const nn = send_proc.offset + send_proc.index.AT(n); + for (size_t v=0; v<nvars; ++v) { + gathered_send_points.AT(n * nvars + v) = + send_points.AT(nn * nvars + v); + } + } + for (int n=0; n<int(send_proc.npoints * nvars); ++n) { + int const nn = send_proc.offset * nvars + n; + send_points.AT(nn) = gathered_send_points.AT(n); + } + // TODO: Use a scatter index list instead of a gather index + // list, and combine the scattering with the calculation - CCTK_REAL rdummy; - int const tag = 0; MPI_Isend (& send_points.AT(send_proc.offset * nvars), send_proc.npoints * nvars, - dist::datatype (rdummy), send_proc.p, tag, + dist::datatype<CCTK_REAL>(), send_proc.p, mpi_tag, comm_world, & send_reqs.AT(pp)); } // for pp - assert (destit == send_points.end()); // Wait for Irecvs to complete MPI_Waitall (recv_reqs.size(), & recv_reqs.front(), MPI_STATUSES_IGNORE); // Gather data for (int n=0; n<recv_descr.npoints; ++n) { - int const nn = recv_descr.index.AT(n); + size_t const nn = recv_descr.index.AT(n); for (size_t v=0; v<nvars; ++v) { - values.AT(v)[n] = recv_points.AT(nn); + values.AT(v)[n] = recv_points.AT(nn * nvars + v); +#ifndef NDEBUG + recv_points.AT(nn * nvars + v) = poison; +#endif } } diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh index c34b9bf32..d136ff900 100644 --- a/Carpet/CarpetInterp2/src/fasterp.hh +++ b/Carpet/CarpetInterp2/src/fasterp.hh @@ -1,7 +1,9 @@ #ifndef FASTERP_HH #define FASTERP_HH +#include <cassert> #include <cstdlib> +#include <iostream> #include <vector> #include <cctk.h> @@ -27,6 +29,52 @@ namespace CarpetInterp2 { + // Keep a map, refinement level, and component index together + struct mrc_t { + static int maps; + static int reflevels; + static int components; + + static void determine_mrc_info (); + static int get_max_ind () + { + return maps * reflevels * components; + } + + int m; + int rl; + int c; + + mrc_t () + { + } + + mrc_t (int const m_, int const rl_, int const c_) + : m(m_), rl(rl_), c(c_) + { + assert (m>=0 and m<maps); + assert (rl>=0 and rl<reflevels); + assert (c>=0 and c<components); + } + + // Create a mrc from an integer index + mrc_t (int ind); + + // Convert a mrc into an integer index + int get_ind () const + { + return c + components * (rl + reflevels * m); + } + + void output (ostream& os) const; + }; + + inline + ostream& operator<< (ostream& os, mrc_t const & mrc) + { mrc.output(os); return os; } + + + // A global location, given by its global coordinates struct fasterp_glocs_t { vector<CCTK_REAL> coords[dim]; @@ -56,15 +104,21 @@ namespace CarpetInterp2 { // An integer location, given by map, refinement level, and // component struct fasterp_iloc_t { - int m, rl, c; + mrc_t mrc; // map, refinement level, component // ivect idx; int ind3d; rvect offset; // in terms of grid points static MPI_Datatype mpi_datatype (); + + void output (ostream& os) const; }; + inline + ostream& operator<< (ostream& os, fasterp_iloc_t const & iloc) + { iloc.output(os); return os; } + struct fasterp_dest_loc_t { @@ -82,6 +136,7 @@ namespace CarpetInterp2 { public: void calc_stencil (fasterp_iloc_t const & iloc, + ivect const & lsh, int order); void interpolate (ivect const & lsh, @@ -103,8 +158,15 @@ namespace CarpetInterp2 { vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) const; + + public: + void output (ostream& os) const; }; + inline + ostream& operator<< (ostream& os, fasterp_src_loc_t const & sloc) + { sloc.output(os); return os; } + // A receive descriptor, describing what is received from other @@ -120,36 +182,37 @@ namespace CarpetInterp2 { vector<int> procinds; int npoints; // total number of received points - vector<int> index; // gather index list + vector<int> index; // gather index list: index[user-location] = + // mpi-location }; // A send descriptor; describing what to send to other processors struct send_comp_t { // This structure does not exist for all components -- components // which are not accessed are not described, making this a sparse - // data structure. The field c contains the component number. + // data structure. The fields m, rl, and c identify the + // component. vector<fasterp_src_loc_t> locs; - int c; // source component - }; - - struct send_rl_t { - vector<send_comp_t> comps; - vector<int> compinds; + + mrc_t mrc; // source map, refinement level, component + ivect lsh; + int offset; + int npoints; }; - struct send_map_t { - vector<send_rl_t> rls; - }; - struct send_proc_t { // This structure does not exist for all processors -- processors // with which there is no communication are not described, making // this a sparse data structure. The field p contains the // processor number. - vector<send_map_t> maps; + vector<send_comp_t> comps; + int p; // receiving processor int offset; int npoints; // total number of sent points + + vector<int> index; // gather index list: index[mpi-location] = + // calculation-location }; struct send_descr_t { diff --git a/Carpet/CarpetLib/src/dist.hh b/Carpet/CarpetLib/src/dist.hh index cf4cbd70c..6868d85ce 100644 --- a/Carpet/CarpetLib/src/dist.hh +++ b/Carpet/CarpetLib/src/dist.hh @@ -172,7 +172,30 @@ namespace dist { inline unsigned int c_ndatatypes () { return 16; } - + template <typename T> unsigned int c_datatype () { abort(); } + template<> inline unsigned int c_datatype <char> () { return 0; } + template<> inline unsigned int c_datatype <signed char> () { return 1; } + template<> inline unsigned int c_datatype <unsigned char> () { return 2; } + template<> inline unsigned int c_datatype <short> () { return 3; } + template<> inline unsigned int c_datatype <unsigned short> () { return 4; } + template<> inline unsigned int c_datatype <int> () { return 5; } + template<> inline unsigned int c_datatype <unsigned int> () { return 6; } + template<> inline unsigned int c_datatype <long> () { return 7; } + template<> inline unsigned int c_datatype <unsigned long> () { return 8; } + template<> inline unsigned int c_datatype <long long> () { return 9; } + template<> inline unsigned int c_datatype <float> () { return 10; } + template<> inline unsigned int c_datatype <double> () { return 11; } + template<> inline unsigned int c_datatype <long double> () { return 12; } +#ifdef HAVE_CCTK_COMPLEX8 + template<> inline unsigned int c_datatype <CCTK_COMPLEX8> () { return 13; } +#endif +#ifdef HAVE_CCTK_COMPLEX16 + template<> inline unsigned int c_datatype <CCTK_COMPLEX16> () { return 14; } +#endif +#ifdef HAVE_CCTK_COMPLEX32 + template<> inline unsigned int c_datatype <CCTK_COMPLEX32> () { return 15; } +#endif + ///////////////////////////////////////////////////////////////// // MPI Datatype helpers // Map a C datatype to its corresponding MPI datatype. @@ -231,6 +254,30 @@ namespace dist { { return mpi_complex32; } #endif + template <typename T> MPI_Datatype datatype () { abort(); } + template<> inline MPI_Datatype datatype <char> () { return MPI_CHAR; } + template<> inline MPI_Datatype datatype <signed char> () { return MPI_CHAR; } + template<> inline MPI_Datatype datatype <unsigned char> () { return MPI_UNSIGNED_CHAR; } + template<> inline MPI_Datatype datatype <short> () { return MPI_SHORT; } + template<> inline MPI_Datatype datatype <unsigned short> () { return MPI_UNSIGNED_SHORT; } + template<> inline MPI_Datatype datatype <int> () { return MPI_INT; } + template<> inline MPI_Datatype datatype <unsigned int> () { return MPI_UNSIGNED; } + template<> inline MPI_Datatype datatype <long> () { return MPI_LONG; } + template<> inline MPI_Datatype datatype <unsigned long> () { return MPI_UNSIGNED_LONG; } + template<> inline MPI_Datatype datatype <long long> () { return MPI_LONG_LONG_INT; } + template<> inline MPI_Datatype datatype <float> () { return MPI_FLOAT; } + template<> inline MPI_Datatype datatype <double> () { return MPI_DOUBLE; } + template<> inline MPI_Datatype datatype <long double> () { return MPI_LONG_DOUBLE; } +#ifdef HAVE_CCTK_COMPLEX8 + template<> inline MPI_Datatype datatype <CCTK_COMPLEX8> () { return mpi_complex8; } +#endif +#ifdef HAVE_CCTK_COMPLEX16 + template<> inline MPI_Datatype datatype <CCTK_COMPLEX16> () { return mpi_complex16; } +#endif +#ifdef HAVE_CCTK_COMPLEX32 + template<> inline MPI_Datatype datatype <CCTK_COMPLEX32> () { return mpi_complex32; } +#endif + } // namespace dist diff --git a/Carpet/CarpetLib/src/vect.hh b/Carpet/CarpetLib/src/vect.hh index e3f6b086b..3c4232525 100644 --- a/Carpet/CarpetLib/src/vect.hh +++ b/Carpet/CarpetLib/src/vect.hh @@ -434,6 +434,20 @@ inline int minloc (const vect<T,D>& a) { return r; } +/** Return the n-dimensional linear array index. */ +template<typename T,int D> +inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) PURE; +template<typename T,int D> +inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) { + T r(0); + for (int d=D-1; d>=0; --d) { + assert (lsh[d]>=0); + assert (ind[d]>=0 and ind[d]<lsh[d]); + r = r * lsh[d] + ind[d]; + } + return r; +} + // Higher order functions |