#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CCTK_MPI # include #else # include "nompi.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Carpet { using namespace std; // Helper routines for spliting regions automatically // The cost for a region, assuming a cost of 1 per interior point static rvect cost (region_t const & reg) { DECLARE_CCTK_PARAMETERS; static rvect costfactor; static bool initialised = false; if (not initialised) { costfactor = rvect(1.0); if (dim > 0) costfactor[0] = 1.0 / aspect_ratio_x; if (dim > 1) costfactor[1] = 1.0 / aspect_ratio_y; if (dim > 2) costfactor[2] = 1.0 / aspect_ratio_z; } if (reg.extent.empty()) return rvect(0); return rvect (reg.extent.shape() / reg.extent.stride()) * costfactor; } static void ClassifyPoints (cGH const * cctkGH, int rl); static void SplitRegionsMaps_Automatic_Recursively (bvect const & dims, int const firstproc, int const nprocs, region_t & superreg, vector & newregs); static void SplitRegions_AsSpecified (cGH const * cctkGH, vector & superregs, vector & regs); void CheckRegions (gh::mregs const & regsss) { char const * const where = "CheckRegions"; static Timer timer (where); timer.start(); // At least one multigrid level if (regsss.size() == 0) { CCTK_WARN (0, "I cannot set up a grid hierarchy with zero multigrid levels."); } assert (regsss.size() > 0); // At least one refinement level if (regsss.AT(0).size() == 0) { CCTK_WARN (0, "I cannot set up a grid hierarchy with zero refinement levels."); } assert (regsss.AT(0).size() > 0); // At most maxreflevels levels if ((int)regsss.AT(0).size() > maxreflevels) { CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "I cannot set up a grid hierarchy with more than Carpet::max_refinement_levels refinement levels. I found Carpet::max_refinement_levels=%d, while %d levels were requested.", (int)maxreflevels, (int)regsss.AT(0).size()); } assert ((int)regsss.AT(0).size() <= maxreflevels); for (int ml=0; ml<(int)regsss.size(); ++ml) { int num_regions = 0; for (int rl=0; rl<(int)regsss.AT(0).size(); ++rl) { // No empty levels // (but allow some empty maps) // assert (regsss.AT(ml).AT(rl).size() > 0); num_regions += regsss.AT(ml).AT(rl).size(); for (int c=0; c<(int)regsss.AT(ml).AT(rl).size(); ++c) { // Check sizes // (but allow processors with zero grid points) // assert (all(regsss.AT(rl).AT(c).AT(ml).extent.lower() <= // regsss.AT(rl).AT(c).AT(ml).extent.upper())); // Check strides ivect const str = (maxspacereflevelfact / spacereffacts.AT(rl) * ipow(mgfact, ml)); assert (all(regsss.AT(ml).AT(rl).AT(c).extent.stride() % str == 0)); // Check alignments assert (all(regsss.AT(ml).AT(rl).AT(c).extent.lower() % str == 0)); assert (all(regsss.AT(ml).AT(rl).AT(c).extent.upper() % str == 0)); } } // No empty levels assert (num_regions > 0); } timer.stop(); } bool Regrid (cGH const * const cctkGH, bool const force_recompose, bool const do_init) { DECLARE_CCTK_PARAMETERS; char const * const where = "Regrid"; static Timer timer (where); timer.start(); Checkpoint ("Regridding level %d...", reflevel); assert (is_level_mode()); bool const have_regrid = CCTK_IsFunctionAliased ("Carpet_Regrid"); bool const have_regridmaps = CCTK_IsFunctionAliased ("Carpet_RegridMaps"); bool const use_regridmaps = regrid_in_level_mode and have_regridmaps; if (not use_regridmaps and not have_regrid) { static bool didtell = false; if (maxreflevels > 1 and not didtell) { CCTK_WARN (2, "The regridding routine Carpet_Regrid has not been provided. There will be no regridding. Maybe you forgot to activate a regridding thorn?"); didtell = true; } timer.stop(); return false; } if (regrid_in_level_mode and not have_regridmaps) { static bool didtell = false; if (maps > 1 and maxreflevels > 1 and not didtell) { CCTK_WARN (2, "The regridding routine Carpet_RegridMaps has not been provided. Regridding will be performed in singlemap mode instead of level mode."); didtell = true; } } bool did_change = false; if (not use_regridmaps) { BEGIN_MAP_LOOP(cctkGH, CCTK_GF) { gh::rregs superregss = vhh.AT(map)->superregions; gh::mregs regsss; // Check whether to recompose CCTK_INT const do_recompose = Carpet_Regrid (cctkGH, & superregss, & regsss, force_recompose); assert (do_recompose >= 0); did_change = did_change or do_recompose; if (do_recompose) { RegridMap (cctkGH, map, superregss, regsss, do_init); } } END_MAP_LOOP; } else { vector superregsss (maps); vector regssss (maps); BEGIN_MAP_LOOP(cctkGH, CCTK_GF) { superregsss.AT(map) = vhh.AT(map)->superregions; } END_MAP_LOOP; // Check whether to recompose CCTK_INT const do_recompose = Carpet_RegridMaps (cctkGH, & superregsss, & regssss, force_recompose); assert (do_recompose >= 0); // TODO #if 1 // #ifdef CARPET_DEBUG { int ival = do_recompose; MPI_Bcast (& ival, 1, MPI_INT, 0, dist::comm()); assert (ival == do_recompose); } #endif did_change = did_change or do_recompose; if (do_recompose) { BEGIN_MAP_LOOP(cctkGH, CCTK_GF) { gh::rregs const & superregss = superregsss.AT(map); gh::mregs const & regsss = regssss.AT(map); RegridMap (cctkGH, map, superregss, regsss, do_init); } END_MAP_LOOP; } } // if regrid in level mode if (did_change) { PostRegrid (cctkGH); } // if did change timer.stop(); if (enable_no_storage) { CCTK_WARN (CCTK_WARN_ALERT, "Carpet completed its internal setup, and would now normally go on to allocate memory. Since the parameter Carpet::enable_no_storage has been set, Carpet will exit instead."); CCTK_Exit ((cGH*) cctkGH, 0); } return did_change; } void RegridMap (cGH const * const cctkGH, int const m, gh::rregs const & superregss, gh::mregs const & regsss, bool const do_init) { DECLARE_CCTK_PARAMETERS; char const * const where = "RegridMap"; static Timer timer (where); timer.start(); Waypoint ("Regridding map %d...", m); // TODO: keep levels fixed here #if 0 // // Keep this level fixed if it is not evolved // if (regrid_only_evolved_levels) { int type; bool const use_tapered_grids = * static_cast (CCTK_ParameterGet ("use_tapered_grids", "Carpet", &type)); assert (type == PARAMETER_BOOLEAN); int const do_every = use_tapered_grids ? maxtimereflevelfact / timereffacts.AT(max(0,rl-1)): maxtimereflevelfact / timereffacts.AT( rl ); bool const regrid_this_level = (cctkGH->cctk_iteration - 1) % do_every == 0; if (not regrid_this_level) { // Set regions from current grid structure regions.AT(rl) = ...; } } #endif // Check the regions CheckRegions (regsss); // TODO: check also that the current and all coarser levels did // not change // Regrid vhh.AT(m)->regrid (superregss, regsss, do_init); // Output grid structure to screen OutputSuperregions (cctkGH, m, * vhh.AT(m), superregss); OutputGrids (cctkGH, m, * vhh.AT(m), * vdd.AT(m)); // Write grid structure to file OutputGridStructure (cctkGH, m, regsss); OutputGridCoordinates (cctkGH, m, regsss); Waypoint ("Done regridding map %d.", m); timer.stop(); } void PostRegrid (cGH const * const cctkGH) { DECLARE_CCTK_PARAMETERS; char const * const where = "PostRegrid"; static Timer timer (where); timer.start(); // Calculate new number of levels int const oldreflevels = reflevels; reflevels = vhh.AT(0)->reflevels(); for (int m=0; mreflevels() == reflevels); } // TODO #if 1 // #ifdef CARPET_DEBUG { // All processes must use the same number of levels int ival = reflevels; MPI_Bcast (& ival, 1, MPI_INT, 0, dist::comm()); assert (ival == reflevels); } #endif // One cannot switch off the current level assert (reflevels > reflevel); // Set new number of active time levels for (int n=0; n=0 and rlreflevels()); did_recompose |= vhh.AT(m)->recompose (rl, do_init); Waypoint ("Done recomposing the grid hierarchy for map %d level %d.", m, rl); } ClassifyPoints (cctkGH, rl); if (did_recompose) { ++ level_regridding_epochs.at(rl); } timer.stop(); return did_recompose; } void RegridFree (cGH const * const cctkGH, bool const do_init) { char const * const where = "RegridFree"; static Timer timer (where); timer.start(); Checkpoint ("Freeing after regridding level %d...", reflevel); assert (is_level_mode()); // Free old grid structure for (int m=0; mregrid_free (do_init); } timer.stop(); } void OutputSuperregions (cGH const * const cctkGH, int const m, gh const & hh, gh::rregs const & superregss) { CCTK_INFO ("Grid structure (superregions, grid points):"); for (int rl=0; rl<(int)superregss.size(); ++rl) { assert (rl < hh.reflevels()); for (int c=0; c<(int)superregss.AT(rl).size(); ++c) { const ivect & levfact = spacereffacts.AT(rl); const ibbox & ext = superregss.AT(rl).AT(c).extent; const ivect & lower = ext.lower(); const ivect & upper = ext.upper(); const ivect & stride = ext.stride(); assert (all(lower * levfact % maxspacereflevelfact == 0)); assert (all(upper * levfact % maxspacereflevelfact == 0)); cout << " [" << rl << "][" << m << "][" << c << "]" << " exterior: " << lower / stride << " : " << upper / stride << " (" << (upper - lower) / stride + 1 << " + PADDING) " << prod ((upper - lower) / stride + 1) << eol; } } CCTK_INFO ("Grid structure (superregions, coordinates):"); for (int rl=0; rl<(int)superregss.size(); ++rl) { assert (rl < hh.reflevels()); for (int c=0; c<(int)superregss.AT(rl).size(); ++c) { const rvect origin = domainspecs.AT(m).exterior_min; const rvect delta = (domainspecs.AT(m).exterior_max - domainspecs.AT(m).exterior_min) / rvect (domainspecs.AT(m).npoints - 1); const ibbox & ext = superregss.AT(rl).AT(c).extent; const ivect & ilower = ext.lower(); const ivect & iupper = ext.upper(); const ivect & levfact = spacereffacts.AT(rl); const ibbox & base = hh.baseextent(0,0); const ivect & bstride = base.stride(); cout << " [" << rl << "][" << m << "][" << c << "]" << " exterior: " << origin + delta * rvect(ilower) / rvect(bstride) << " : " << origin + delta * rvect(iupper) / rvect(bstride) << " : " << delta / rvect(levfact) << eol; } } fflush (stdout); } void OutputGrids (cGH const * const cctkGH, int const m, gh const & hh, dh const & dd) { DECLARE_CCTK_PARAMETERS; if (verbose or veryverbose) { CCTK_INFO ("Grid structure (grid points):"); for (int ml=0; mlcctk_iteration << eol; file << "maps " << maps << eol; } file << m << " mglevels " << regsss.size() << eol; for (int ml=0; ml<(int)regsss.size(); ++ml) { file << m << " " << ml << " reflevels " << regsss.AT(ml).size() << eol; for (int rl=0; rl<(int)regsss.AT(ml).size(); ++rl) { file << m << " " << ml << " " << rl << " components " << regsss.AT(ml).AT(rl).size() << eol; for (int c=0; c<(int)regsss.AT(ml).AT(rl).size(); ++c) { file << m << " " << ml << " " << rl << " " << c << " " << regsss.AT(ml).AT(rl).AT(c).processor << " " << regsss.AT(ml).AT(rl).AT(c).extent << " " << regsss.AT(ml).AT(rl).AT(c).outer_boundaries << eol; } } } file.close(); assert (file.good()); } void OutputGridCoordinates (cGH const * const cctkGH, int const m, gh::mregs const & regsss) { DECLARE_CCTK_PARAMETERS; // Output only on the root processor if (CCTK_MyProc(cctkGH) != 0) return; // Output only if output is desired if (CCTK_EQUALS (grid_coordinates_filename, "")) return; // Create the output directory CCTK_CreateDirectory (0755, out_dir); ostringstream filenamebuf; filenamebuf << out_dir << "/" << grid_coordinates_filename; // we need a persistent temporary here string filenamestr = filenamebuf.str(); const char * filename = filenamestr.c_str(); ofstream file; static bool do_truncate = true; if (do_truncate) { do_truncate = false; struct stat fileinfo; if (IO_TruncateOutputFiles (cctkGH) or stat(filename, &fileinfo)!=0) { file.open (filename, ios::out | ios::trunc); assert (file.good()); file << "# grid coordinates" << eol << "# format: map reflevel region mglevel bounding-box" << eol; assert (file.good()); } } if (not file.is_open()) { file.open (filename, ios::app); assert (file.good()); } // Affine transformation between index space and coordinate space rvect const origin = domainspecs.AT(m).exterior_min; rvect const spacing = (domainspecs.AT(m).exterior_max - domainspecs.AT(m).exterior_min) / rvect (domainspecs.AT(m).npoints - 1); rvect const scale = rvect (vhh.AT(m)->baseextents.AT(0).AT(0).stride()) / spacing; file << "iteration " << cctkGH->cctk_iteration << eol; file << "maps " << maps << eol; file << m << " mglevels " << regsss.size() << eol; for (int ml=0; ml<(int)regsss.size(); ++ml) { file << m << " " << ml << " reflevels " << regsss.AT(ml).size() << eol; for (int rl=0; rl<(int)regsss.AT(ml).size(); ++rl) { ibset const extents (regsss.AT(ml).AT(rl), & region_t::extent); file << m << " " << ml << " " << rl << " regions " << extents.setsize() << eol; int c=0; for (ibset::const_iterator bi = extents.begin(); bi != extents.end(); ++ bi) { #if 0 ibbox const & ext = * bi; ibbox const & baseext = vhh.AT(m)->baseextents.AT(ml).AT(rl); ibbox const & coarseext = vhh.AT(m)->baseextents.AT(ml).AT(0 ); // This is nice, but is wrong since CartGrid3D has not yet // initialised the coordinates ivect const cctk_levfac = spacereffacts.AT(rl); ivect const cctk_levoff = baseext.lower() - coarseext.lower(); ivect const cctk_levoffdenom = baseext.stride(); ivect const cctk_lbnd = (ext.lower() - baseext.lower()) / ext.stride(); ivect const cctk_ubnd = (ext.upper() - baseext.lower()) / ext.stride(); rvect const cctk_origin_space = origin_space.AT(m).AT(ml); rvect const cctk_delta_space = delta_space.AT(m) * rvect (ipow (mgfact, ml)); rvect const CCTK_ORIGIN_SPACE = cctk_origin_space + cctk_delta_space / rvect (cctk_levfac) * rvect (cctk_levoff) / rvect (cctk_levoffdenom); rvect const CCTK_DELTA_SPACE = cctk_delta_space / rvect (cctk_levfac); rvect const rlower = CCTK_ORIGIN_SPACE + rvect (cctk_lbnd) * CCTK_DELTA_SPACE; rvect const rupper = CCTK_ORIGIN_SPACE + rvect (cctk_ubnd) * CCTK_DELTA_SPACE; rvect const rdelta = CCTK_DELTA_SPACE; #endif ibbox const & ext = * bi; rvect const rlower = origin + rvect (ext.lower()) / scale; rvect const rupper = origin + rvect (ext.upper()) / scale; rvect const rdelta = rvect (ext.stride()) / scale; rbbox const rb (rlower, rupper, rdelta); file << m << " " << ml << " " << rl << " " << c << " " << rb << eol; ++c; } } } file << eol; file.close(); assert (file.good()); } // TODO: this routine should go into CarpetRegrid (except maybe // SplitRegions_AlongZ for grid arrays) void SplitRegions (cGH const * const cctkGH, vector & superregs, vector & regs) { DECLARE_CCTK_PARAMETERS; assert (regs.empty()); if (CCTK_EQUALS (processor_topology, "along-z")) { SplitRegions_AlongZ (cctkGH, superregs, regs); } else if (CCTK_EQUALS (processor_topology, "along-dir")) { SplitRegions_AlongDir (cctkGH, superregs, regs, split_direction); } else if (CCTK_EQUALS (processor_topology, "automatic")) { SplitRegions_Automatic (cctkGH, superregs, regs); } else if (CCTK_EQUALS (processor_topology, "manual")) { SplitRegions_AsSpecified (cctkGH, superregs, regs); } else { assert (0); } } void OutputGridStatistics (cGH const * const cctkGH) { // Grid array statistics int num_arrays = 0; int num_gfs = 0; int size_gfs = 0; CCTK_REAL num_active_array_points = 0; CCTK_REAL num_total_array_points = 0; CCTK_REAL size_total_array_points = 0; for (int g=0; gcomponents(0); ++c) { dh::light_dboxes const & b = dd->light_boxes.AT(0).AT(0).AT(c); num_active_array_points += num_tl * num_vars * b.active_size; num_total_array_points += num_tl * num_vars * b.exterior_size; size_total_array_points += num_tl * size_vars * b.exterior_size; } break; } case CCTK_GF: num_gfs += num_tl * num_vars; size_gfs += num_tl * size_vars; break; default: assert (0); } } // Grid function statistics int num_comps = 0; int num_steps = 0; CCTK_REAL num_active_mem_points = 0; CCTK_REAL num_owned_mem_points = 0; CCTK_REAL num_total_mem_points = 0; CCTK_REAL size_total_mem_points = 0; CCTK_REAL num_active_cpu_points = 0; CCTK_REAL num_owned_cpu_points = 0; CCTK_REAL num_total_cpu_points = 0; for (int m=0; mcomponents(rl); ++c) { ++ num_comps; dh::light_dboxes const & b = dd->light_boxes.AT(ml).AT(rl).AT(c); num_active_mem_points += num_gfs * b.active_size; num_owned_mem_points += num_gfs * b.owned_size; num_total_mem_points += num_gfs * b.exterior_size; size_total_mem_points += size_gfs * b.exterior_size; num_active_cpu_points += trf * b.active_size; num_owned_cpu_points += trf * b.owned_size; num_total_cpu_points += trf * b.exterior_size; } } } } num_active_cpu_points /= num_steps * delta_time; num_owned_cpu_points /= num_steps * delta_time; num_total_cpu_points /= num_steps * delta_time; // Load balance statistics CCTK_REAL const rzero = 0; CCTK_REAL const rmax = numeric_limits::max(); vector min_active (reflevels, rmax); vector max_active (reflevels, 0); vector avg_active (reflevels, 0); vector sdv_active (reflevels, 0); for (int rl=0; rl num_active_per_proc (dist::size(), 0); for (int m=0; mcomponents(rl); ++c) { int const p = hh->processor(rl,c); dh::light_dboxes const & b = dd->light_boxes.AT(ml).AT(rl).AT(c); num_active_per_proc.AT(p) += num_gfs * b.active_size; } } } for (int p=0; p & superregs, vector & regs) { assert (regs.empty()); SplitRegions_AlongDir (cctkGH, superregs, regs, dim - 1); } void SplitRegions_AlongDir (cGH const * const cctkGH, vector & superregs, vector & regs, int const dir) { assert (regs.empty()); // Something to do? if (superregs.size() == 0) { return; } const int nprocs = CCTK_nProcs (cctkGH); assert (superregs.size() == 1); regs = superregs; if (nprocs == 1) { regs.AT(0).processor = 0; pseudoregion_t const preg (regs.AT(0).extent, regs.AT(0).processor); assert (superregs.AT(0).processors == NULL); superregs.AT(0).processors = new ipfulltree (preg); return; } assert (dir>=0 and dir bounds (nprocs+1); vector subtrees (nprocs); for (int c=0; c rub0[dir]) clb[dir] = rub0[dir]; if (cub[dir] > rub0[dir]) cub[dir] = rub0[dir]; assert (clb[dir] <= cub[dir]); assert (cub[dir] <= rub0[dir]); region_t & reg = regs.AT(c); ibbox & ext = reg.extent; b2vect & obnd = reg.outer_boundaries; int & proc = reg.processor; ext = ibbox(clb, cub-cstr, cstr); obnd = obnd0; obnd[0][dir] &= clb[dir] == rlb0[dir]; obnd[1][dir] &= cub[dir] == rub0[dir]; proc = c; pseudoregion_t const preg (reg.extent, c); bounds.AT(c) = reg.extent.lower()[dir]; subtrees.AT(c) = new ipfulltree (preg); } bounds.AT(nprocs) = rub0[dir] + rstr0[dir]; assert (superregs.AT(0).processors == NULL); superregs.AT(0).processors = new ipfulltree (dir, bounds, subtrees); for (int c=0; c<(int)regs.size(); ++c) { assert (regs.AT(c).processor == c); } } void SplitRegions_Automatic (cGH const * const cctkGH, vector & superregs, vector & regs) { assert (regs.empty()); vector > superregss (1, superregs); vector > regss (1); SplitRegionsMaps_Automatic (cctkGH, superregss, regss); assert (superregss.size() == 1); superregs = superregss.AT(0); assert (regss.size() == 1); regs = regss.AT(0); } static void SplitRegions_AsSpecified (cGH const * const cctkGH, vector & superregs, vector & regs) { DECLARE_CCTK_PARAMETERS; assert (regs.empty()); // Something to do? if (superregs.size() == 0) { return; } const int nprocs = CCTK_nProcs (cctkGH); assert (superregs.size() == 1); region_t const & reg0 = superregs.AT(0); const ivect rstr0 = reg0.extent.stride(); const ivect rlb0 = reg0.extent.lower(); const ivect rub0 = reg0.extent.upper() + rstr0; const b2vect obnd0 = reg0.outer_boundaries; const ivect nprocs_dir (processor_topology_3d_x, processor_topology_3d_y, processor_topology_3d_z); assert (all (nprocs_dir > 0)); if (prod (nprocs_dir) != nprocs) { CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "The specified processor topology [%d,%d,%d] requires %d processors, but there are %d processors", nprocs_dir[0], nprocs_dir[1], nprocs_dir[2], prod (nprocs_dir), nprocs); } assert (prod (nprocs_dir) == nprocs); regs.resize (nprocs); const ivect cstr = rstr0; const ivect glonp = (rub0 - rlb0) / cstr; // const ivect locnp = (glonp + nprocs_dir - 1) / nprocs_dir; const ivect locnp = glonp / nprocs_dir; const ivect rem = glonp % nprocs_dir; const ivect step = locnp * cstr; assert (dim==3); vector boundsz(nprocs_dir[2]); vector subtreesz(nprocs_dir[2]+1); for (int k=0; k boundsy(nprocs_dir[2]); vector subtreesy(nprocs_dir[2]+1); for (int j=0; j boundsx(nprocs_dir[2]); vector subtreesx(nprocs_dir[2]+1); for (int i=0; i= 0)); assert (all (clb <= cub)); assert (all (cub <= rub0)); assert (all (not (ipos==0) or clb==rlb0)); assert (all (not (ipos==nprocs_dir-1) or cub==rub0)); region_t & reg = regs.AT(c); ibbox & ext = reg.extent; b2vect & obnd = reg.outer_boundaries; int & proc = reg.processor; ext = ibbox(clb, cub-cstr, cstr); obnd = obnd0; obnd[0] &= clb == rlb0; obnd[1] &= cub == rub0; proc = c; pseudoregion_t preg (reg.extent, c); subtreesx.AT(i) = new ipfulltree (preg); } boundsx.AT(nprocs_dir[0]) = rub0[0] + rstr0[0]; subtreesy.AT(j) = new ipfulltree (0, boundsx, subtreesx); } boundsy.AT(nprocs_dir[1]) = rub0[1] + rstr0[1]; subtreesz.AT(k) = new ipfulltree (1, boundsy, subtreesy); } boundsz.AT(nprocs_dir[2]) = rub0[2] + rstr0[2]; assert (superregs.AT(0).processors == NULL); superregs.AT(0).processors = new ipfulltree (2, boundsz, subtreesz); for (int c=0; c<(int)regs.size(); ++c) { assert (regs.AT(c).processor == c); } } // TODO: this routine should go into CarpetRegrid (except maybe // SplitRegions_AlongZ for grid arrays) void SplitRegionsMaps (cGH const * const cctkGH, vector > & superregss, vector > & regss) { DECLARE_CCTK_PARAMETERS; assert (regss.size() == superregss.size()); for (size_t m=0; m & newregs) { DECLARE_CCTK_PARAMETERS; if (recompose_verbose) cout << "SRMAR enter" << endl; // Check preconditions assert (nprocs >= 1); // Remember old size size_t const oldsize = newregs.size(); // Are we done? if (all(dims)) { if (recompose_verbose) cout << "SRMAR bottom" << endl; // Check precondition assert (nprocs == 1); // Return argument region_t newreg = superreg; newreg.processor = firstproc; newregs.push_back (newreg); pseudoregion_t const preg (newreg.extent, newreg.processor); superreg.processors = new ipfulltree (preg); // Check postcondition assert (newregs.size() == oldsize + nprocs); if (recompose_verbose) cout << "SRMAR exit" << endl; return; } // Special case if (superreg.extent.empty()) { if (recompose_verbose) cout << "SRMAR empty" << endl; // Choose a direction int mydim = -1; for (int d=0; d=0 and mydim bounds (nprocs+1); vector subtrees (nprocs); for (int p=0; p=0; --d) { if (not dims[d]) { ++ alldims; CCTK_REAL const thiscost = rcost[d]; if (thiscost >= 0.999999 * mycost) { mydim = d; mycost = thiscost; } totalcost *= thiscost; } } assert (mydim>=0 and mydim=0); if (recompose_verbose) cout << "SRMAR mydim " << mydim << endl; if (recompose_verbose) cout << "SRMAR mycost " << mycost << endl; // Choose a number of slices for this direction CCTK_REAL const mycost1 = mycost * pow(nprocs / totalcost, CCTK_REAL(1) / alldims); nslices = min (nprocs, int (floor (mycost1 + CCTK_REAL(0.5)))); } assert (nslices <= nprocs); if (recompose_verbose) cout << "SRMAR " << mydim << " nprocs " << nprocs << endl; if (recompose_verbose) cout << "SRMAR " << mydim << " nslices " << nslices << endl; // Mark this direction as done assert (not dims[mydim]); bvect const newdims = dims.replace(mydim, true); // Split the remaining processors vector mynprocs(nslices); int const mynprocs_base = nprocs / nslices; int const mynprocs_left = nprocs - nslices * mynprocs_base; int sum_mynprocs = 0; for (int n=0; n mynpoints(nslices); int const npoints = (superreg.extent.shape() / superreg.extent.stride())[mydim]; int const npoints_bnd_lo = superreg.outer_boundaries[0][mydim] ? granularity_boundary : 0; int const npoints_bnd_hi = superreg.outer_boundaries[1][mydim] ? granularity_boundary : 0; // Keep track of how many points and processors we have left to // distribute int npoints_left = npoints; int nprocs_left = nprocs; for (int n=0; n 0); CCTK_REAL const npoints1 = CCTK_REAL(1) * npoints_left * mynprocs.AT(n) / nprocs_left; mynpoints.AT(n) = int (floor (npoints1 / granularity + CCTK_REAL(0.5))) * granularity; if (n == 0) mynpoints.AT(n) += npoints_bnd_lo; if (npoints_left - mynpoints.AT(n) <= npoints_bnd_hi) mynpoints.AT(n) += npoints_bnd_hi; mynpoints.AT(n) = min(npoints_left, mynpoints.AT(n)); assert (mynpoints.AT(n) >= 0 and mynpoints.AT(n) <= npoints_left); npoints_left -= mynpoints.AT(n); nprocs_left -= mynprocs.AT(n); assert (npoints_left >= 0); assert (nprocs_left >= 0); } assert (npoints_left == 0); assert (nprocs_left == 0); if (recompose_verbose) cout << "SRMAR " << mydim << " mynpoints " << mynpoints << endl; // Create the regions and recurse if (recompose_verbose) cout << "SRMAR " << mydim << ": create bboxes" << endl; ivect lo = superreg.extent.lower(); ivect up = superreg.extent.upper(); ivect const str = superreg.extent.stride(); int p = firstproc; vector bounds (nslices+1); vector subtrees (nslices); for (int n=0; n 0) { newob[0][mydim] = false; } if (n < nslices-1) { up[mydim] = lo[mydim] + (mynpoints.AT(n) - 1) * str[mydim]; newob[1][mydim] = false; } region_t newreg (superreg); newreg.extent = ibbox(lo, up, str); newreg.outer_boundaries = newob; if (recompose_verbose) cout << "SRMAR " << mydim << " newreg " << newreg << endl; // Recurse bounds.AT(n) = lo[mydim]; SplitRegionsMaps_Automatic_Recursively (newdims, p, mynprocs.AT(n), newreg, newregs); if (recompose_verbose) cout << "SRMAR " << mydim << " newregs " << newregs << endl; assert (newreg.processors != NULL); subtrees.AT(n) = newreg.processors; newreg.processors = NULL; newreg.processor = p; // Next slice lo[mydim] = up[mydim] + str[mydim]; p += mynprocs.AT(n); } assert (up[mydim] == superreg.extent.upper()[mydim]); assert (p == firstproc + nprocs); bounds.AT(nslices) = up[mydim] + str[mydim]; // Create tree assert (superreg.processors == NULL); superreg.processors = new ipfulltree (mydim, bounds, subtrees); // Check postcondition assert (newregs.size() == oldsize + nprocs); if (recompose_verbose) cout << "SRMAR exit" << endl; } void SplitRegionsMaps_Automatic (cGH const * const cctkGH, vector > & superregss, vector > & regss) { DECLARE_CCTK_PARAMETERS; if (recompose_verbose) cout << "SRMA enter" << endl; int const nmaps = superregss.size(); int minmap = 1000000000; for (int m=0; m superregs; { for (int m=0; m 0); int const newnregs = nprocs * ncomps; if (recompose_verbose) cout << "SRMA newnregs " << newnregs << endl; if (recompose_verbose) cout << "SRMA: distributing processors to regions" << endl; vector mycosts(nregs); for (int r=0; r mynprocs(nregs); for (int r=0; r 0) { if (recompose_verbose) cout << "SRMA nregs_left " << nregs_left << endl; int maxr = -1; CCTK_REAL maxratio = -1; for (int r=0; r maxratio) { maxr=r; maxratio=ratio; } } assert (maxr>=0 and maxr newregs; newregs.reserve (newnregs); for (int r=0, p=0; r myncomps(nmaps, 0); #if 0 vector empty_comps(nmaps, 0); #endif for (int r=0; r=0 and m mynregs(nmaps, 0); #if 0 vector empty_regs(nmaps, 0); #endif for (int r=0; r=0 and m= 0 and newregs.AT(r).processor < nprocs); } { vector tmpncomps(nmaps, 0); for (int r=0; r=0 and m >& superregss, vector >& regss) { DECLARE_CCTK_PARAMETERS; // Find map numbers int const nmaps = superregss.size(); vector map_index(maps, -1); for (int m=0; m regs; for (int m=0; m > split_regss; int const nprocs = CCTK_nProcs(cctkGH); int const nworkers = nprocs; balance(regs, split_regss, nworkers, maximum_imbalance, same_number_of_components_on_each_process); // Assign process numbers, and make the tree structure // inaccessible from the regions for (int p=0; pis_leaf()); assert(split_regss.AT(p).AT(c).processors->payload().component == -1); split_regss.AT(p).AT(c).processors->payload().component = p; // Set process number in region split_regss.AT(p).AT(c).processors = NULL; assert(split_regss.AT(p).AT(c).processor == -1); split_regss.AT(p).AT(c).processor = p; } } // Distribute by map, implicitly assigning component numbers for (int p=0; p & regs) { DECLARE_CCTK_PARAMETERS; regs.resize (mglevels, reg); if (mglevels > 1) { // boundary offsets jjvect nboundaryzones, is_internal, is_staggered, shiftout; if (domain_from_multipatch and CCTK_IsFunctionAliased ("MultiPatch_GetBoundarySpecification")) { check (not MultiPatch_GetBoundarySpecification (m, 2*dim, &nboundaryzones[0][0], &is_internal[0][0], &is_staggered[0][0], &shiftout[0][0])); } else { check (not GetBoundarySpecification (2*dim, &nboundaryzones[0][0], &is_internal[0][0], &is_staggered[0][0], &shiftout[0][0])); } // (distance in grid points between the exterior and the physical boundary) iivect offset; for (int d=0; d bases(mglevels); bases.AT(0) = base; for (int ml=1; ml > const & regss, vector > > & regsss) { regsss.resize (mglevels); for (int ml=0; ml mg_regs; MakeMultigridBoxes (cctkGH, m, base, regss.AT(rl).AT(c), mg_regs); assert ((int)mg_regs.size() == mglevels); for (int ml=0; ml > > const & regsss, vector > > > & regssss) { for (int m = 0; m < maps; ++m) { MakeMultigridBoxes (cctkGH, m, regsss.AT(m), regssss.AT(m)); } // for m } static void ClassifyPoints (cGH const * const cctkGH, int const rl) { // negative: needs to be set explicitly (e.g. boundary) // zero: unused (e.g. ghost) // positive: needs to be evolved // -1: boundary point (needs to be set explicitly) // 0: unused (e.g. ghost point, or restriction target) // n=1..N: evolved, used for integrator substeps i<=n // (i=N..1, counting backwards; see MoL documentation) // i.e.: n=1: used between time steps (i.e., should be visualised) // n>1: used only while time stepping (e.g. buffer zones) BEGIN_META_MODE(cctkGH) { BEGIN_MGLEVEL_LOOP (cctkGH) { ENTER_LEVEL_MODE (cctkGH, rl) { BEGIN_LOCAL_MAP_LOOP(cctkGH, CCTK_GF) { BEGIN_LOCAL_COMPONENT_LOOP(cctkGH, CCTK_GF) { DECLARE_CCTK_ARGUMENTS; #pragma omp parallel CCTK_LOOP3_ALL(CarpetClassifyPoints, cctkGH, i,j,k) { int const ind = CCTK_GFINDEX3D (cctkGH, i, j, k); point_class[ind] = 1; } CCTK_ENDLOOP3_ALL(CarpetClassifyPoints); } END_LOCAL_COMPONENT_LOOP; } END_LOCAL_MAP_LOOP; } LEAVE_LEVEL_MODE; } END_MGLEVEL_LOOP; } END_META_MODE; } } // namespace Carpet