#include #include #include #include #include #include #include #include #include #ifdef CCTK_MPI # include #else # include "nompi.h" #endif #include #include #include #include #include "fasterp.hh" #ifdef CARPETINTERP2_CHECK # define CI2C(x,y) x,y #else # define CI2C(x,y) #endif namespace CarpetInterp2 { int const ipoison = -1234567890; CCTK_REAL const poison = -1.0e+12; int get_poison (int const &) CCTK_ATTRIBUTE_CONST; int get_poison (int const &) { return ipoison; } CCTK_REAL get_poison (CCTK_REAL const &) CCTK_ATTRIBUTE_CONST; CCTK_REAL get_poison (CCTK_REAL const &) { return poison; } template void fill (vector & v, T const & val) { // fill (v.begin(), v.end(), val); #pragma omp parallel for for (int n=0; n void fill_with_poison (vector & v) { #ifndef NDEBUG T dummy; fill (v, get_poison(dummy)); // fill (v.begin(), v.end(), get_poison(dummy)); #endif } template void check_for_poison (vector const & v) { #pragma omp parallel for for (int n=0; n(), /* find MPI datatype */ \ STRINGIFY(name), /* field name */ \ STRINGIFY(type), /* type name */ \ } dist::mpi_struct_descr_t const descr[] = { ENTRY(int, mrc), #ifdef CARPETINTERP2_CHECK ENTRY(int, pn), ENTRY(int, ipos), ENTRY(int, ind), #endif ENTRY(int, ind3d), ENTRY(CCTK_REAL, offset), {1, sizeof(s), MPI_UB, "MPI_UB", "MPI_UB"} }; #undef ENTRY newtype = dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr, "fasterp_iloc_t::mpi_datatype", sizeof s); initialised = true; } return newtype; } void fasterp_iloc_t:: output (ostream& os) const { os << "fasterp_iloc_t{" << "mrc=" << mrc << "," #ifdef CARPETINTERP2_CHECK << "pn=" << pn << "," << "ipos=" << ipos << "," << "ind=" << ind << "," #endif << "ind3d=" << ind3d << "," << "offset=" << offset << "}"; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// int mrc_t::maps = -1; int mrc_t::reflevels = -1; int mrc_t::components = -1; void mrc_t:: determine_mrc_info () { maps = Carpet::maps; reflevels = Carpet::reflevels; components = 0; for (int m=0; mcomponents (rl); components = max (components, ncomps); } } } mrc_t:: mrc_t (int ind) { assert (ind >= 0); int const ind0 = ind; c = ind % components; ind /= components; rl = ind % reflevels; ind /= reflevels; m = ind % maps; ind /= maps; assert (ind == 0); assert (get_ind() == ind0); } void mrc_t:: output (ostream& os) const { os << "mrc{m=" << m << ",rl=" << rl << ",c=" << c << "}"; } void pn_t:: output (ostream& os) const { os << "pn{p=" << p << ",n=" << n << "}"; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // Calculate the coefficients for one interpolation stencil // TODO: Could templatify this function on the order to improve // efficiency int fasterp_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif int const order) { assert (order <= max_order); CCTK_REAL const eps = 1.0e-12; #ifdef CARPETINTERP2_CHECK mrc = iloc.mrc; pn = iloc.pn; ipos = iloc.ipos; // Save ash saved_ash = ash; #endif #ifndef NDEBUG // Poison all coefficients for (int d=0; d= CCTK_REAL(0.5)*(order-1) and offset < CCTK_REAL(0.5)*(order+1))); for (int d=0; d=0 and ind+either(exact,0,order) void fasterp_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif vector const & varptrs, CCTK_REAL * restrict const vals) const { #ifdef CARPETINTERP2_CHECK assert (all (ash == saved_ash)); #endif assert (O0 == 0 or not exact[0]); assert (O1 == 0 or not exact[1]); assert (O2 == 0 or not exact[2]); size_t const di = 1; size_t const dj = di * ash[0]; size_t const dk = dj * ash[1]; #ifdef CARPETINTERP2_CHECK assert (all (ind>=0 and ind+ivect(O0,O1,O2) void fasterp_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif vector const & varptrs, CCTK_REAL * restrict const vals) const { int const Z = 0; if (exact[2]) { if (exact[1]) { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } else { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } else { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } } } // Interpolate a set of variables at the same location, with a given // set of interpolation coefficients. This calls a specialised // interpolation function, depending on whether interpolation is // exact in each of the directions. void fasterp_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif int const order, vector const & varptrs, CCTK_REAL * restrict const vals) const { switch (order) { case 0: interpolate< 0> (ash, CI2C(lsh,) varptrs, vals); break; case 1: interpolate< 1> (ash, CI2C(lsh,) varptrs, vals); break; case 2: interpolate< 2> (ash, CI2C(lsh,) varptrs, vals); break; case 3: interpolate< 3> (ash, CI2C(lsh,) varptrs, vals); break; case 4: interpolate< 4> (ash, CI2C(lsh,) varptrs, vals); break; case 5: interpolate< 5> (ash, CI2C(lsh,) varptrs, vals); break; case 6: interpolate< 6> (ash, CI2C(lsh,) varptrs, vals); break; case 7: interpolate< 7> (ash, CI2C(lsh,) varptrs, vals); break; case 8: interpolate< 8> (ash, CI2C(lsh,) varptrs, vals); break; case 9: interpolate< 9> (ash, CI2C(lsh,) varptrs, vals); break; case 10: interpolate<10> (ash, CI2C(lsh,) varptrs, vals); break; case 11: interpolate<11> (ash, CI2C(lsh,) varptrs, vals); break; default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented"); assert (0); } } void fasterp_src_loc_t:: output (ostream& os) const { os << "fasterp_src_loc_t{"; os << "coeffs=["; for (int d=0; d0) os << ","; os << "["; for (int n=0; n<=max_order; ++n) { if (n>0) os << ","; os << coeffs[d][n]; } os << "]"; } os << "],"; os << "exact=" << exact << ","; #ifdef CARPETINTERP2_CHECK os << "pn=" << pn << ","; os << "mrc=" << mrc << ","; os << "ipos=" << ipos << ","; os << "ind=" << ind << ","; #endif os << "ind3d=" << ind3d; #ifdef CARPETINTERP2_CHECK os << "," << "saved_ash=" << saved_ash; #endif os << "}"; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // Calculate the coefficients for one interpolation stencil // TODO: Could templatify this function on the order to improve // efficiency int fasterp_eno2_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif int /*order*/) { //assert (order <= max_order); CCTK_REAL const eps = 1.0e-12; #ifdef CARPETINTERP2_CHECK mrc = iloc.mrc; pn = iloc.pn; ipos = iloc.ipos; // Save ash saved_ash = ash; #endif // first we setup 1st order stencil! { const int order = 1; #ifndef NDEBUG // Poison all coefficients for (int d=0; d= CCTK_REAL(0.5)*(order-1) and offset < CCTK_REAL(0.5)*(order+1))); for (int d=0; d= CCTK_REAL(1.0) and offset <= CCTK_REAL(2.0))); for (int d=0; d=0 and ind+either(exact,0,order)= CCTK_REAL(0.0) and offset <= CCTK_REAL(1.0))); for (int d=0; d void fasterp_eno2_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif vector const & varptrs, CCTK_REAL * restrict const vals) const { #ifdef CARPETINTERP2_CHECK assert (all (ash == saved_ash)); #endif assert (O0 == 0 or not exact[0]); assert (O1 == 0 or not exact[1]); assert (O2 == 0 or not exact[2]); size_t const di = 1; size_t const dj = di * ash[0]; size_t const dk = dj * ash[1]; #ifdef CARPETINTERP2_CHECK assert (all (ind>=0 and ind+ivect(O0,O1,O2) qz(4); for (size_t k=0; k<=3; ++k) { vector qy(4); for (size_t j=0; j<=3; ++j) { vector qx(4); for (size_t i=0; i<=3; ++i) { qx[i] = varptrs.AT(v)[ind3d + i*di + j*dj + k*dk]; } switch (O0) { case 0: qy[j] = qx[0]; break; default: { const CCTK_REAL diffleft = qx[0] + qx[2] - 2.0*qx[1]; const CCTK_REAL diffright = qx[1] + qx[3] - 2.0*qx[2]; CCTK_REAL res; if (fabs(diffleft) < fabs(diffright)) res = coeffsLeft[0][0]*qx[0] + coeffsLeft[0][1]*qx[1] + coeffsLeft[0][2]*qx[2]; else res = coeffsRight[0][0]*qx[1] + coeffsRight[0][1]*qx[2] + coeffsRight[0][2]*qx[3]; if (diffleft * diffright <= 0.0 || (res-qx[1])*(qx[2]-res) < 0.0) { res = coeffs[0][0]*qx[1] + coeffs[0][1]*qx[2]; } qy[j] = res; break; } } } switch (O1) { case 0: qz[k] = qy[0]; break; default: { const CCTK_REAL diffleft = qy[0] + qy[2] - 2.0*qy[1]; const CCTK_REAL diffright = qy[1] + qy[3] - 2.0*qy[2]; CCTK_REAL res; if (fabs(diffleft) < fabs(diffright)) res = coeffsLeft[1][0]*qy[0] + coeffsLeft[1][1]*qy[1] + coeffsLeft[1][2]*qy[2]; else res = coeffsRight[1][0]*qy[1] + coeffsRight[1][1]*qy[2] + coeffsRight[1][2]*qy[3]; if (diffleft * diffright <= 0.0 || (res-qy[1])*(qy[2]-res) < 0.0) { res = coeffs[1][0]*qy[1] + coeffs[1][1]*qy[2]; } qz[k] = res; break; } } } switch (O2) { case 0: vals[v] = qz[0]; break; default: { const CCTK_REAL diffleft = qz[0] + qz[2] - 2.0*qz[1]; const CCTK_REAL diffright = qz[1] + qz[3] - 2.0*qz[2]; CCTK_REAL res; if (fabs(diffleft) < fabs(diffright)) res = coeffsLeft[2][0]*qz[0] + coeffsLeft[2][1]*qz[1] + coeffsLeft[2][2]*qz[2]; else res = coeffsRight[2][0]*qz[1] + coeffsRight[2][1]*qz[2] + coeffsRight[2][2]*qz[3]; if (diffleft * diffright <= 0.0 || (res-qz[1])*(qz[2]-res) < 0.0) { res = coeffs[2][0]*qz[1] + coeffs[2][1]*qz[2]; } vals[v] = res; break; } } } // for v #ifdef CARPETINTERP2_CHECK # if 0 for (size_t v=0; v void fasterp_eno2_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif vector const & varptrs, CCTK_REAL * restrict const vals) const { int const Z = 0; if (exact[2]) { if (exact[1]) { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } else { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } else { if (exact[0]) { interpolate (ash, CI2C(lsh,) varptrs, vals); } else { interpolate (ash, CI2C(lsh,) varptrs, vals); } } } } // Interpolate a set of variables at the same location, with a given // set of interpolation coefficients. This calls a specialised // interpolation function, depending on whether interpolation is // exact in each of the directions. void fasterp_eno2_src_loc_t:: interpolate (ivect const & ash, #ifdef CARPETINTERP2_CHECK ivect const & lsh, #endif int const order, vector const & varptrs, CCTK_REAL * restrict const vals) const { switch (order) { case 2: interpolate< 2> (ash, CI2C(lsh,) varptrs, vals); break; default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders other than 2 don't make sense for eno2"); assert (0); } } void fasterp_eno2_src_loc_t:: output (ostream& os) const { os << "fasterp_src_loc_t{"; os << "coeffs=["; for (int d=0; d0) os << ","; os << "["; for (int n=0; n<=max_order; ++n) { if (n>0) os << ","; os << coeffs[d][n]; } os << "]"; } os << "],"; os << "exact=" << exact << ","; #ifdef CARPETINTERP2_CHECK os << "pn=" << pn << ","; os << "mrc=" << mrc << ","; os << "ipos=" << ipos << ","; os << "ind=" << ind << ","; #endif os << "ind3d=" << ind3d; #ifdef CARPETINTERP2_CHECK os << "," << "saved_ash=" << saved_ash; #endif os << "}"; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // Set up an interpolation starting from global coordinates template fasterp_setup_gen_t:: fasterp_setup_gen_t (cGH const * restrict const cctkGH, fasterp_glocs_t const & locations, int const order_) : order (order_), reflevel (Carpet::reflevel), regridding_epoch (reflevel == -1 ? Carpet::regridding_epoch : Carpet::level_regridding_epochs.AT(reflevel)) { // Some global properties int const npoints = locations.size(); // Calculate patch numbers and local coordinates fasterp_llocs_t local_locations (npoints); if (CCTK_IsFunctionAliased ("MultiPatch_GlobalToLocal")) { // This is a multi-patch simulation: convert global to local // coordinates CCTK_REAL const * coords[dim]; CCTK_REAL * local_coords[dim]; for (int d=0; d fasterp_setup_gen_t:: fasterp_setup_gen_t (cGH const * restrict const cctkGH, fasterp_llocs_t const & locations, int const order_) : order (order_) { setup (cctkGH, locations); } // Helper for setting up an interpolation template void fasterp_setup_gen_t:: setup (cGH const * restrict const cctkGH, fasterp_llocs_t const & locations) { DECLARE_CCTK_PARAMETERS; if (verbose) CCTK_VInfo (CCTK_THORNSTRING, "Setting up interpolation for %d grid points", int(locations.size())); if (order < 0) { CCTK_WARN (CCTK_WARN_ABORT, "Interpolation order must be non-negative"); } if (order > max_order) { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "Interpolation order cannot be larger than max_order=%d; " "order=%d was requested. " "(You can increase the compile time constant max_order " "in thorn CarpetInterp2.)", max_order, order); } // Some global properties int const npoints = locations.size(); int const nprocs = CCTK_nProcs (cctkGH); // int const myproc = CCTK_MyProc (cctkGH); mrc_t::determine_mrc_info(); int const maxmrc = mrc_t::get_max_ind(); MPI_Comm const & comm_world = * (MPI_Comm const *) GetMPICommWorld (cctkGH); // Obtain the coordinate ranges for all patches vector lower (Carpet::maps); vector upper (Carpet::maps); vector delta (Carpet::maps); // spacing on finest possible grid vector idelta (Carpet::maps); // inverse spacing for (int m=0; mbaseextent(Carpet::mglevel, 0); delta.AT(m) /= baseext.stride(); idelta.AT(m) = CCTK_REAL(1.0) / delta.AT(m); if (veryverbose) { cout << "GetCoordRange[" << m << "]: lower=" << lower.AT(m) << " upper=" << upper.AT(m) << " delta=" << delta.AT(m) << endl; } } // Calculate refinement levels, components, and integer grid point // indices if (verbose) CCTK_INFO ("Mapping points onto components"); vector ilocs (npoints); vector proc (npoints); fill_with_poison (proc); vector nlocs (nprocs, 0); int min_rl, max_rl; if (Carpet::is_level_mode()) { min_rl = Carpet::reflevel; max_rl = Carpet::reflevel + 1; } else if (Carpet::is_global_mode()) { min_rl = 0; max_rl = Carpet::reflevels; } #pragma omp parallel for for (int n=0; nbaseextent(Carpet::mglevel, 0); // rvect const rpos = // (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) * // rvect (baseext.upper() - baseext.lower()); rvect const rpos = (pos - lower.AT(m)) * idelta.AT(m); // Find refinement level and component int rl, c; ivect ipos; hh->locate_position (rpos, Carpet::mglevel, min_rl, max_rl, rl, c, ipos); if (not (rl>=0 and c>=0)) { #pragma omp critical { ostringstream msg; msg << "Interpolation point " << n << " on map " << m << " " << "at " << pos << " is outside of the grid hierarchy"; msg << "\n" << "rl=" << rl << " c=" << c << "\n" << "rpos=" << rpos << "\n" << "ipos=" << ipos << "\n" << "lower=" << lower << "\n" << "upper=" << upper << "\n" << "delta=" << delta << "\n" << "idelta=" << idelta << "\n" << "hh=" << *hh << "\n"; CCTK_WARN (CCTK_WARN_ABORT, msg.str().c_str()); } } assert (rl>=0 and c>=0); ibbox const & ext = Carpet::vdd.AT(m) ->light_boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; rvect dpos = rpos - rvect(ipos); // Convert from Carpet indexing to grid point indexing assert (all (ipos % ext.stride() == ivect(0))); ipos /= ext.stride(); dpos /= rvect(ext.stride()); if (not (all (fabs(dpos) <= rvect(0.5)))) { cout << "fasterp.cc:659\n" << " dpos=" << dpos << "\n" << " ext=" << ext << "\n"; } assert (all (fabs(dpos) <= rvect(0.5))); ivect const ind = ipos - ext.lower() / ext.stride(); ivect const ash = pad_shape(ext); int const ind3d = index(ash, ind); #if 0 ENTER_SINGLEMAP_MODE (cctkGH, m, CCTK_GF) { ENTER_LOCAL_MODE (cctkGH, c, CCTK_GF) { CCTK_REAL const * restrict const xptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::x"); CCTK_REAL const * restrict const yptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::y"); CCTK_REAL const * restrict const zptr = (CCTK_REAL const *) CCTK_VarDataPtr (cctkGH, 0, "grid::z"); assert (xptr); assert (yptr); assert (zptr); cout << "CI2 map=" << m << " pos=" << pos << " ind=" << ind << " x=" << xptr[ind3d] << " y=" << yptr[ind3d] << " z=" << zptr[ind3d] << endl; } LEAVE_LOCAL_MODE; } LEAVE_SINGLEMAP_MODE; #endif // TODO: assert that there are enough ghost zones // TODO: store for every face/direction/component/map/reflevel // how wide the boundaries are in every direction. Then check // against this when the stencils are set up. // Store result fasterp_iloc_t & iloc = ilocs.AT(n); iloc.mrc = mrc_t(m, rl, c); #ifdef CARPETINTERP2_CHECK iloc.pn.p = dist::rank(); iloc.pn.n = n; iloc.ipos = ipos * ext.stride(); iloc.ind = ind; #endif iloc.ind3d = ind3d; iloc.offset = dpos; // Find source processor int const p = Carpet::vhh.AT(m)->processor(rl,c); proc.AT(n) = p; int& nlocs_p = nlocs.AT(p); #pragma omp atomic ++ nlocs_p; // Output if (veryverbose) { #pragma omp critical { cout << "Point #" << n << " at " << pos << ": iloc " << iloc << endl; } } } // Find mapping from processors to "processor indices": It may be // too expensive to store data for all processors, so we store // only data about those processors with which we actually // communicate. if (verbose) CCTK_INFO ("Determine set of communicating processors"); { int n_nz_nlocs = 0; for (int p=0; p 0) { ++ n_nz_nlocs; } } recv_descr.procs.resize (n_nz_nlocs); recv_descr.procinds.resize (nprocs, -1); int pp = 0; int offset = 0; for (int p=0; p 0) { recv_descr.procs.AT(pp).p = p; recv_descr.procs.AT(pp).offset = offset; recv_descr.procs.AT(pp).npoints = nlocs.AT(p); recv_descr.procinds.AT(p) = pp; ++ pp; offset += nlocs.AT(p); } } assert (pp == n_nz_nlocs); assert (offset == npoints); recv_descr.npoints = npoints; } // Create a mapping "index" from location index, as specified by // the user, to the index order in which the data are received // from the other processors. if (verbose) { CCTK_INFO ("Determine inter-processor gather index permutation"); } { vector index (recv_descr.procs.size()); fill_with_poison (index); for (int pp=0; pp= 0); recv_descr.index.AT(n) = index.AT(pp); ++index.AT(pp); } for (int pp=0; pp received (npoints); fill (received, false); // NOTE: Can't use OMP parallel here -- vector has the // wrong memory layout for this for (int n=0; n recv_npoints (nprocs), recv_offsets (nprocs); fill (recv_npoints, 0); fill_with_poison (recv_offsets); { int offset = 0; for (int pp=0; pp send_npoints (nprocs), send_offsets (nprocs); fill_with_poison (send_npoints); fill_with_poison (send_offsets); MPI_Alltoall (&recv_npoints.front(), 1, MPI_INT, &send_npoints.front(), 1, MPI_INT, comm_world); int npoints_send = 0; for (int p=0; p scattered_ilocs(npoints); #pragma omp parallel for for (int n=0; n gathered_ilocs(npoints_send); MPI_Alltoallv (&scattered_ilocs.front(), &recv_npoints.front(), &recv_offsets.front(), fasterp_iloc_t::mpi_datatype(), &gathered_ilocs.front(), &send_npoints.front(), &send_offsets.front(), fasterp_iloc_t::mpi_datatype(), comm_world); #ifdef CARPETINTERP2_CHECK // Ensure that the ilocs were sent to the right processor for (int p=0; pis_local(rl,c)); } } #endif // Fill in send descriptors send_descr.npoints = npoints_send; { int n_nz_send_npoints = 0; for (int p=0; p 0) ++n_nz_send_npoints; } send_descr.procs.resize (n_nz_send_npoints); int pp = 0; for (int p=0; p 0) { send_proc_t & send_proc = send_descr.procs.AT(pp); send_proc.p = p; send_proc.offset = send_offsets.AT(p); send_proc.npoints = send_npoints.AT(p); ++pp; } } assert (pp == n_nz_send_npoints); } // Calculate stencil coefficients if (verbose) CCTK_INFO ("Calculate stencil coefficients"); for (size_t pp=0; pp & send_proc = send_descr.procs.AT(pp); vector mrc2comp (maxmrc, -1); vector comp2mrc (maxmrc); fill_with_poison (comp2mrc); int ncomps = 0; vector npoints_comp (maxmrc); fill (npoints_comp, 0); // TODO: parallelise with OpenMP for (int n=0; n & send_comp = send_proc.comps.AT(comp); int const mrc = comp2mrc.AT(comp); send_comp.mrc = mrc; send_comp.locs.reserve (npoints_comp.AT(mrc)); mrc_t const themrc (mrc); int const m = themrc.m; int const rl = themrc.rl; int const c = themrc.c; assert (Carpet::vhh.AT(m)->is_local(rl,c)); ibbox const & ext = Carpet::vdd.AT(m)->light_boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; send_comp.ash = pad_shape(ext); #ifdef CARPETINTERP2_CHECK send_comp.lsh = ext.shape() / ext.stride(); #endif send_comp.offset = offset; send_comp.npoints = npoints_comp.AT(mrc); offset += send_comp.npoints; } assert (offset == send_proc.npoints); send_proc.index.resize (send_proc.npoints); fill_with_poison (send_proc.index); // TODO: This is not parallel! Loop over comps instead? // #pragma omp parallel for for (int n=0; n & send_comp = send_proc.comps.AT(comp); send_proc.index.AT(n) = send_comp.offset + send_comp.locs.size(); //fasterp_src_loc_t sloc; FASTERP sloc; int const ierr = sloc.calc_stencil (iloc, send_comp.ash, CI2C(send_comp.lsh,) order); if (ierr) { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not determine valid interpolation stencil for point %d on map %d, refinement level %d, component %d", n, iloc.mrc.m, iloc.mrc.rl, iloc.mrc.c); } send_comp.locs.push_back (sloc); } for (int comp=0; comp & send_comp = send_proc.comps.AT(comp); assert (int(send_comp.locs.size()) == send_comp.npoints); } #ifndef NDEBUG { vector used(send_proc.npoints, false); for (int n=0; n const & send_comp = send_proc.comps.at(comp); assert (int(send_comp.locs.size()) == send_comp.npoints); for (int n=0; n recv_count (dist::size()); fill (recv_count, 0); for (size_t pp=0; pp 0); recv_count.AT(recv_proc.p) = recv_proc.npoints; } vector send_count (dist::size()); fill (send_count, 0); for (size_t pp=0; pp const & send_proc = send_descr.procs.AT(pp); assert (send_count.AT(send_proc.p) == 0); assert (send_proc.npoints > 0); send_count.AT(send_proc.p) = send_proc.npoints; } { vector tmp_count (dist::size()); MPI_Alltoall (&send_count.front(), 1, MPI_INT, &tmp_count .front(), 1, MPI_INT, dist::comm()); swap (send_count, tmp_count); } bool error = false; for (int p=0; p fasterp_setup_gen_t:: ~fasterp_setup_gen_t () { // do nothing -- C++ calls destructors automatically } // Interpolate template void fasterp_setup_gen_t:: interpolate (cGH const * restrict const cctkGH, vector const & varinds, vector & values) const { DECLARE_CCTK_PARAMETERS; // Check regridding epoch if (outofdate()) { if (reflevel == -1) { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "The Carpet grid structure was changed since this fasterp_setup was created"); } else { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "The Carpet grid structure on level %d was changed since this fasterp_setup was created", reflevel); } } // Desired time level int const tl = 0; // current time size_t const nvars = varinds.size(); if (verbose) CCTK_VInfo (CCTK_THORNSTRING, "Interpolating %d variables", int(nvars)); assert (values.size() == nvars); if (nvars == 0) return; if (interp_barrier) { static Timers::Timer barrier_timer ("PreBarrier",0,true); barrier_timer.start(); CCTK_Barrier(cctkGH); barrier_timer.stop(); } for (size_t v=0; v= 0 and vi <= CCTK_NumVars()); int const gi = CCTK_GroupIndexFromVarI (vi); assert (gi >= 0); cGroup group; check (not CCTK_GroupData (gi, &group)); assert (group.grouptype == CCTK_GF); assert (group.vartype == CCTK_VARIABLE_REAL); assert (group.dim == dim); #if 0 // Cannot be called in level mode cGroupDynamicData dyndata; { int const ierr = CCTK_GroupDynamicData (cctkGH, gi, &dyndata); assert (not ierr); } assert (dyndata.activetimelevels > tl); #endif assert (recv_descr.npoints == 0 or values.AT(v) != NULL); } MPI_Comm const & comm_world = * (MPI_Comm const *) GetMPICommWorld (cctkGH); int const mpi_tag = 0; // Post Irecvs if (verbose) CCTK_INFO ("Posting MPI_Irecvs"); static Timers::Timer irecvs_timer ("PostIrecvs",0,true); irecvs_timer.start(); vector recv_points (recv_descr.npoints * nvars); fill_with_poison (recv_points); vector recv_reqs (recv_descr.procs.size()); #ifdef CARPETINTERP2_CHECK vector recv_pn (recv_descr.npoints); vector recv_reqs_pn (recv_descr.procs.size()); #endif for (size_t pp=0; pp(), recv_proc.p, mpi_tag, comm_world, & recv_reqs.AT(pp)); #ifdef CARPETINTERP2_CHECK MPI_Irecv (& recv_pn.AT(recv_proc.offset), recv_proc.npoints * 2, dist::mpi_datatype(), recv_proc.p, mpi_tag, comm_world, & recv_reqs_pn.AT(pp)); #endif } irecvs_timer.stop(); // Interpolate data and post Isends if (verbose) CCTK_INFO ("Interpolating and posting MPI_Isends"); static Timers::Timer interpolate_timer ("Interpolate",0,true); interpolate_timer.instantiate(); // TODO: Use one array per processor? vector send_points (send_descr.npoints * nvars); fill_with_poison (send_points); vector send_reqs (send_descr.procs.size()); #ifdef CARPETINTERP2_CHECK vector send_pn (send_descr.npoints); vector send_reqs_pn (send_descr.procs.size()); #endif for (size_t pp=0; pp const & send_proc = send_descr.procs.AT(pp); vector computed_points (send_proc.npoints * nvars); fill_with_poison (computed_points); #ifdef CARPETINTERP2_CHECK vector computed_pn (send_descr.npoints); #endif for (size_t comp=0; comp const & send_comp = send_proc.comps.AT(comp); int const m = send_comp.mrc.m; int const rl = send_comp.mrc.rl; int const c = send_comp.mrc.c; int const lc = Carpet::vhh.AT(m)->get_local_component(rl,c); // Get pointers to 3D data vector varptrs (nvars); for (size_t v=0; v= 0); int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); assert (vi >= 0); ggf const *const ff = Carpet::arrdata.AT(gi).AT(m).data.AT(vi); void const *const ptr = ff->data_pointer(tl, rl, lc, Carpet::mglevel)->storage(); varptrs.AT(v) = (CCTK_REAL const *)ptr; assert (varptrs.AT(v)); } // TODO: This loops seems unbalanced. Maybe the different // interpolations have different costs. interpolate_timer.start(); #pragma omp parallel for schedule (dynamic, 1000) for (int n=0; n0) { assert (send_pn.AT(send_proc.offset + n ).n > send_pn.AT(send_proc.offset + n-1).n); } } #endif MPI_Isend (& send_points.AT(send_proc.offset * nvars), send_proc.npoints * nvars, dist::mpi_datatype(), send_proc.p, mpi_tag, comm_world, & send_reqs.AT(pp)); #ifdef CARPETINTERP2_CHECK MPI_Isend (& send_pn.AT(send_proc.offset), send_proc.npoints * 2, dist::mpi_datatype(), send_proc.p, mpi_tag, comm_world, & send_reqs_pn.AT(pp)); #endif } // for pp // Wait for Irecvs to complete if (verbose) CCTK_INFO ("Waiting for MPI_Irevcs to complete"); static Timers::Timer waitall_ir_timer ("WaitAll_Irecvs",0,true); waitall_ir_timer.start(); MPI_Waitall (recv_reqs.size(), & recv_reqs.front(), MPI_STATUSES_IGNORE); #ifdef CARPETINTERP2_CHECK MPI_Waitall (recv_reqs.size(), & recv_reqs_pn.front(), MPI_STATUSES_IGNORE); #endif waitall_ir_timer.stop(); // Gather data if (verbose) CCTK_INFO ("Gathering data"); static Timers::Timer gather_timer ("Gather",0,true); gather_timer.start(); #pragma omp parallel for for (int n=0; n; template class fasterp_setup_gen_t; } // namespace CarpetInterp2