#include #include #include #include #include #include #include #include "fasterp.hh" namespace CarpetInterp2 { // Erik's gdb cannot print local variables in functions where an // assert fails. Hence the calls to assert are temporarily moved // into a function of their own. void ASSERT (bool const cond) { if (cond) return; abort(); } // Create an MPI datatype for location information MPI_Datatype fasterp_iloc_t::mpi_datatype () { static bool initialised = false; static MPI_Datatype newtype; if (not initialised) { static fasterp_iloc_t s; #define ARRSIZE(m) (sizeof(s.m) / sizeof(s.m[0])) #define OFFSET(m) ((char*)&(s.m) - (char*)&(s)) // offsetof doesn't work (why?) #define SIZE (sizeof(s)) CCTK_REAL rdummy; dist::mpi_struct_descr_t const descr[] = { { 1, OFFSET(m ), MPI_INT }, { 1, OFFSET(rl ), MPI_INT }, { 1, OFFSET(c ), MPI_INT }, { 1, OFFSET(ind3d ), MPI_INT }, {ARRSIZE(offset), OFFSET(offset), dist::datatype(rdummy)}, { 1, SIZE , MPI_UB } }; #undef ARRSIZE #undef OFFSET #undef SIZE dist::create_mpi_datatype (sizeof(descr) / sizeof(descr[0]), descr, newtype); initialised = true; } return newtype; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // Calculate the coefficients for one interpolation stencil void fasterp_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, int const order) { ASSERT (order <= max_order); CCTK_REAL const rone = 1.0; CCTK_REAL const eps = 1.0e-12; int const n0 = (order+1) / 2; for (int d=0; d= eps) { // The interpolation point does not coincide with the anchor // (this is the regular case): ASSERT (fabs(remainder(xi, rone)) > eps/2); for (int n=0; n<=order; ++n) { CCTK_REAL const xn = n - n0; CCTK_REAL c = 1.0; for (int m=0; m<=order; ++m) { if (m != n) { CCTK_REAL const xm = m - n0; c *= (xi - xm) / (xn - xm); } } coeffs[d][n] = c; } exact[d] = false; // The sum of the coefficients must be one CCTK_REAL s = 0.0; for (int n=0; n<=order; ++n) { s += coeffs[d][n]; } ASSERT (fabs(s - rone) <= eps); } else { // The interpolation point coincides with the anchor; no // interpolation is necessary (this is a special case) exact[d] = true; } } ind3d = iloc.ind3d; } // Interpolate a set of variables at the same location, with a given // set of interpolation coefficients. The template mechanism allows // the order in each direction to be different; in particular, the // order can be 0 if the interpolation location coincides with a // source grid point. For interpolation to order O, this function // should be instantiated eight times, with On=O and On=0, for // n=[0,1,2]. We hope that the compiler optimises the for loops // away if On=0. template void fasterp_src_loc_t:: interpolate (ivect const & lsh, vector const & varptrs, CCTK_REAL * restrict const vals) const { size_t const di = 1; size_t const dj = di * lsh[0]; size_t const dk = dj * lsh[1]; for (size_t v=0; v void fasterp_src_loc_t:: interpolate (ivect const & lsh, vector const & varptrs, CCTK_REAL * restrict const vals) const { int const Z = 0; if (exact[2]) { if (exact[1]) { if (exact[0]) { interpolate (lsh, varptrs, vals); } else { interpolate (lsh, varptrs, vals); } } else { if (exact[0]) { interpolate (lsh, varptrs, vals); } else { interpolate (lsh, varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { interpolate (lsh, varptrs, vals); } else { interpolate (lsh, varptrs, vals); } } else { if (exact[0]) { interpolate (lsh, varptrs, vals); } else { interpolate (lsh, varptrs, vals); } } } } // Interpolate a set of variables at the same location, with a given // set of interpolation coefficients. This calls a specialised // interpolation function, depending on whether interpolation is // exact in each of the directions. void fasterp_src_loc_t:: interpolate (ivect const & lsh, int const order, vector const & varptrs, CCTK_REAL * restrict const vals) const { switch (order) { case 0: interpolate< 0> (lsh, varptrs, vals); break; case 1: interpolate< 1> (lsh, varptrs, vals); break; case 2: interpolate< 2> (lsh, varptrs, vals); break; case 3: interpolate< 3> (lsh, varptrs, vals); break; case 4: interpolate< 4> (lsh, varptrs, vals); break; case 5: interpolate< 5> (lsh, varptrs, vals); break; case 6: interpolate< 6> (lsh, varptrs, vals); break; case 7: interpolate< 7> (lsh, varptrs, vals); break; case 8: interpolate< 8> (lsh, varptrs, vals); break; case 9: interpolate< 9> (lsh, varptrs, vals); break; case 10: interpolate<10> (lsh, varptrs, vals); break; case 11: interpolate<11> (lsh, varptrs, vals); break; default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented"); ASSERT (0); } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // Set up an interpolation starting from global coordinates fasterp_setup_t:: fasterp_setup_t (cGH const * restrict const cctkGH, fasterp_glocs_t const & locations, int const order_) : order (order_) { // Some global properties int const npoints = locations.size(); // Calculate patch numbers and local coordinates fasterp_llocs_t local_locations (npoints); if (CCTK_IsFunctionAliased ("MultiPatch_GlobalToLocal")) { // This is a muulti-patch simulation: convert global to local // coordinates CCTK_REAL const * coords[dim]; CCTK_REAL * local_coords[dim]; for (int d=0; d lower (Carpet::maps); vector upper (Carpet::maps); vector delta (Carpet::maps); // spacing on finest possible grid for (int m=0; m ilocs (npoints); vector proc (npoints); vector nlocs (nprocs, 0); int n_nz_nlocs = 0; ASSERT (Carpet::is_level_mode()); for (int n=0; nbaseextent(Carpet::mglevel, 0); rvect const rpos = (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) * rvect (baseext.upper() - baseext.lower()); // Find refinement level and component int rl, c; ivect ipos; hh->locate_position (rpos, Carpet::mglevel, Carpet::reflevel, Carpet::reflevel+1, rl, c, ipos); ibbox const & ext = Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; rvect dpos = rpos - rvect(ipos); ASSERT (all (ipos % ext.stride() == ivect(0))); ipos /= ext.stride(); dpos /= rvect(ext.stride()); ASSERT (all (abs(dpos) <= rvect(0.5))); if (order % 2 != 0) { // Potentially shift the stencil anchor for odd interpolation // orders (i.e., for even numbers of stencil points) ivect const ioff = either (dpos > rvect(0.0), ivect(1), ivect(0)); ipos -= ioff; dpos += rvect(ioff); ASSERT (all (dpos >= rvect(0.0) and dpos < rvect(1.0))); } ivect const ind = ipos - ext.lower() / ext.stride(); ivect const lsh = ext.shape() / ext.stride(); int const ind3d = ind[0] + lsh[0] * (ind[1] + lsh[1] * ind[2]); // Store result fasterp_iloc_t & iloc = ilocs.AT(n); iloc.m = m; iloc.rl = rl; iloc.c = c; iloc.ind3d = ind3d; iloc.offset = dpos; // Find source processor int const p = Carpet::vhh.AT(m)->processor(rl,c); proc.AT(n) = p; if (nlocs.AT(p) == 0) ++n_nz_nlocs; ++ nlocs.AT(p); } // Find mapping from processors to "processor indices": It may be // too expensive to store data for all processors, so we store // only data about those processors with which we actually // communicate. { recv_descr.procs.resize (n_nz_nlocs); recv_descr.procinds.resize (nprocs, -1); int pp = 0; int offset = 0; for (int p=0; p 0) { recv_descr.procs.AT(pp).p = p; recv_descr.procs.AT(pp).offset = offset; recv_descr.procs.AT(pp).npoints = nlocs.AT(p); recv_descr.procinds.AT(p) = pp; ++ pp; offset += nlocs.AT(p); } } ASSERT (pp == n_nz_nlocs); ASSERT (offset == npoints); recv_descr.npoints = npoints; } // Create a mapping "index" from location index, as specified by // the user, to the index order in which the data are received // from the other processors. { vector index (recv_descr.procs.size()); for (int pp=0; pp= 0); recv_descr.index.AT(n) = index.AT(pp); ++index.AT(pp); } for (int pp=0; pp send_npoints (nprocs, 0), send_offsets (nprocs); { int offset = 0; for (int pp=0; pp recv_npoints (nprocs), recv_offsets (nprocs); MPI_Alltoall (&send_npoints.front(), 1, MPI_INT, &recv_npoints.front(), 1, MPI_INT, MPI_COMM_WORLD); int npoints_source = 0; for (int p=0; p scattered_ilocs(npoints); for (int n=0; n gathered_ilocs(npoints_source); MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); MPI_Alltoallv (&scattered_ilocs.front(), &send_npoints.front(), &send_offsets.front(), fasterp_iloc_t::mpi_datatype(), &gathered_ilocs.front(), &recv_npoints.front(), &recv_offsets.front(), fasterp_iloc_t::mpi_datatype(), comm_world); // Fill in send descriptors send_descr.npoints = npoints_source; { int n_nz_recv_npoints = 0; for (int p=0; p 0) ++n_nz_recv_npoints; } send_descr.procs.resize (n_nz_recv_npoints); int pp = 0; for (int p=0; p 0) { send_proc_t & send_proc = send_descr.procs.AT(pp); send_proc.p = p; send_proc.offset = recv_offsets.AT(p); send_proc.npoints = recv_npoints.AT(p); ++pp; } } ASSERT (pp == n_nz_recv_npoints); } for (size_t pp=0; ppcomponents (rl); send_proc.maps.AT(m).rls.AT(rl).compinds.resize (ncomps, -1); } } vector > > npoints_comp (Carpet::maps); for (int m=0; mcomponents (rl); npoints_comp.AT(m).AT(rl).resize(ncomps, 0); } } vector > n_nz_npoints_comp (Carpet::maps); for (int m=0; mcomponents (rl); for (int c=0; c 0) { send_comp_t & comp = send_proc.maps.AT(m).rls.AT(rl).comps.AT(cc); comp.locs.clear(); comp.locs.reserve (npoints_comp.AT(m).AT(rl).AT(c)); comp.c = c; send_proc.maps.AT(m).rls.AT(rl).compinds.AT(c) = cc; ++cc; } } } } } // for pp // Calculate stencil coefficients for (size_t pp=0; pp const & varinds, vector & values) const { size_t const nvars = varinds.size(); ASSERT (values.size() == nvars); for (size_t v=0; v= 0 and vi <= CCTK_NumVars()); int const gi = CCTK_GroupIndexFromVarI (vi); ASSERT (gi >= 0); cGroup group; { int const ierr = CCTK_GroupData (gi, &group); ASSERT (not ierr); } ASSERT (group.grouptype == CCTK_GF); ASSERT (group.vartype == CCTK_VARIABLE_REAL); ASSERT (group.dim == dim); #if 0 // Cannot be called in level mode cGroupDynamicData dyndata; { int const ierr = CCTK_GroupDynamicData (cctkGH, gi, &dyndata); ASSERT (not ierr); } ASSERT (dyndata.activetimelevels >= 1); #endif ASSERT (values.AT(v) != NULL); } MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); // Post Irecvs vector recv_points (recv_descr.npoints * nvars); vector recv_reqs (recv_descr.procs.size()); for (size_t pp=0; pp send_points (send_descr.npoints * nvars); vector::iterator destit = send_points.begin(); vector send_reqs (send_descr.procs.size()); for (size_t pp=0; pp varptrs (nvars); for (size_t v=0; v= 0); int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); ASSERT (vi >= 0); int const tl = 0; varptrs.AT(v) = (CCTK_REAL const *) (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi)) (tl, rl, c, Carpet::mglevel)->storage(); ASSERT (varptrs.AT(v)); } for (size_t n=0; n