From e690bb6fb2ef027f20b542eb93a3fce645115a80 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 29 Feb 2008 23:52:39 -0600 Subject: Move communication schedule into its own data structure Splite the grid hierarchy and communication schedule into two separate data structures. Correct error in determining the processor owning a certain region when calculating the communication schedule. --- Carpet/CarpetLib/src/defs.cc | 5 +++ Carpet/CarpetLib/src/dh.cc | 104 ++++++++++++++++++++++++++----------------- Carpet/CarpetLib/src/dh.hh | 28 ++++++++++++ Carpet/CarpetLib/src/ggf.cc | 20 ++++----- Carpet/CarpetLib/src/ggf.hh | 4 +- 5 files changed, 107 insertions(+), 54 deletions(-) diff --git a/Carpet/CarpetLib/src/defs.cc b/Carpet/CarpetLib/src/defs.cc index a3d43d88d..13ef01301 100644 --- a/Carpet/CarpetLib/src/defs.cc +++ b/Carpet/CarpetLib/src/defs.cc @@ -252,8 +252,10 @@ template size_t memoryof (vector > const & v); template size_t memoryof (vector > const & v); template size_t memoryof (vector > > const & v); template size_t memoryof (vector > const & v); +template size_t memoryof (vector > const & v); template size_t memoryof (vector > const & v); template size_t memoryof (vector > > const & v); +template size_t memoryof (vector > > const & v); template size_t memoryof (vector > > const & v); template size_t memoryof (vector > > const & v); template size_t memoryof (vector > > > const & v); @@ -286,6 +288,7 @@ template ostream& output (ostream& os, const vector >& v); template ostream& output (ostream& os, const vector >& v); template ostream& output (ostream& os, const vector,3> >& v); template ostream& output (ostream& os, const vector & v); +template ostream& output (ostream& os, const vector & v); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); @@ -296,8 +299,10 @@ template ostream& output (ostream& os, const vector > >& v); template ostream& output (ostream& os, const vector > >& v); template ostream& output (ostream& os, const vector,3> > >& v); template ostream& output (ostream& os, const vector > & b); +template ostream& output (ostream& os, const vector > & b); template ostream& output (ostream& os, const vector >& v); template ostream& output (ostream& os, const vector > >& v); template ostream& output (ostream& os, const vector > > >& v); template ostream& output (ostream& os, const vector > > & b); +template ostream& output (ostream& os, const vector > > & b); template ostream& output (ostream& os, const vector > >& v); diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc index 843020ada..cfdcecd0c 100644 --- a/Carpet/CarpetLib/src/dh.cc +++ b/Carpet/CarpetLib/src/dh.cc @@ -67,25 +67,26 @@ prolongation_stencil_size () // Calculate this quantity on this processor? It does not need to be // calculated if it won't be used later on. -static inline +inline int -this_proc (int const c) +dh::this_proc (int const rl, int const c) const { - return c % dist::size(); + // return c % dist::size(); + return h.processor (rl, c); } -static inline +inline bool -on_this_proc (int const c) +dh::on_this_proc (int const rl, int const c) const { - return this_proc (c) == dist::rank(); + return this_proc (rl, c) == dist::rank(); } -static inline +inline bool -on_this_proc (int const c, int const cc) +dh::on_this_proc (int const rl, int const c, int const cc) const { - return on_this_proc (c) or on_this_proc (cc); + return on_this_proc (rl, c) or on_this_proc (rl, cc); } @@ -164,16 +165,22 @@ regrid () oldboxes.clear(); swap (boxes, oldboxes); + fast_oldboxes.clear(); + swap (fast_boxes, fast_oldboxes); boxes.resize (h.mglevels()); + fast_boxes.resize (h.mglevels()); for (int ml = 0; ml < h.mglevels(); ++ ml) { boxes.AT(ml).resize (h.reflevels()); + fast_boxes.AT(ml).resize (h.reflevels()); for (int rl = 0; rl < h.reflevels(); ++ rl) { boxes.AT(ml).AT(rl).resize (h.components(rl)); + fast_boxes.AT(ml).AT(rl).resize (dist::size()); cboxes & level = boxes.AT(ml).AT(rl); + fast_cboxes & fast_level = fast_boxes.AT(ml).AT(rl); @@ -500,9 +507,9 @@ regrid () ibbox const send = recv.expanded_for (obox.interior); ASSERT_c (send <= obox.exterior, "Multigrid restriction: Send region must be contained in exterior"); - if (on_this_proc (c)) { + if (on_this_proc (rl, c)) { int const p = dist::rank(); - level.AT(p).fast_mg_rest_sendrecv.push_back + fast_level.AT(p).fast_mg_rest_sendrecv.push_back (sendrecv_pseudoregion_t (send, c, recv, c)); } } @@ -552,9 +559,9 @@ regrid () recv.expanded_for (box.interior).expand (stencil_size); ASSERT_c (send <= box.exterior, "Multigrid prolongation: Send region must be contained in exterior"); - if (on_this_proc (c)) { + if (on_this_proc (rl, c)) { int const p = dist::rank(); - level.AT(p).fast_mg_prol_sendrecv.push_back + fast_level.AT(p).fast_mg_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, c, recv, c)); } } @@ -611,9 +618,9 @@ regrid () recv.expanded_for (obox.interior).expand (stencil_size); ASSERT_c (send <= obox.exterior, "Refinement prolongation: Send region must be contained in exterior"); - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - level.AT(p).fast_ref_prol_sendrecv.push_back + fast_level.AT(p).fast_ref_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -670,9 +677,9 @@ regrid () { ibbox const & recv = * ri; ibbox const & send = recv; - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - level.AT(p).fast_sync_sendrecv.push_back + fast_level.AT(p).fast_sync_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -734,9 +741,9 @@ regrid () recv.expanded_for (obox.interior).expand (stencil_size); ASSERT_c (send <= obox.exterior, "Boundary prolongation: Send region must be contained in exterior"); - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - level.AT(p).fast_ref_bnd_prol_sendrecv.push_back + fast_level.AT(p).fast_ref_bnd_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -764,7 +771,7 @@ regrid () if (rl > 0) { int const orl = rl - 1; - cboxes & olevel = boxes.AT(ml).AT(orl); + fast_cboxes & fast_olevel = fast_boxes.AT(ml).AT(orl); ibset needrecv; for (int c = 0; c < h.components(rl); ++ c) { @@ -808,9 +815,9 @@ regrid () ibbox const send = recv.expanded_for (box.interior); ASSERT_c (send <= box.active, "Refinement restriction: Send region must be contained in active part"); - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - olevel.AT(p).fast_ref_rest_sendrecv.push_back + fast_olevel.AT(p).fast_ref_rest_sendrecv.push_back (sendrecv_pseudoregion_t (send, c, recv, cc)); } } @@ -843,9 +850,11 @@ regrid () // Synchronisation: - if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) { + if (int (fast_oldboxes.size()) > ml and + int (fast_oldboxes.AT(ml).size()) > rl) + { - int const oldcomponents = oldboxes.AT(ml).AT(rl).size(); + int const oldcomponents = fast_oldboxes.AT(ml).AT(rl).size(); // Synchronisation copies from the same level of the old // grid structure. It should fill as many active points as @@ -862,9 +871,9 @@ regrid () { ibbox const & recv = * ri; ibbox const & send = recv; - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - level.AT(p).fast_old2new_sync_sendrecv.push_back + fast_level.AT(p).fast_old2new_sync_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -920,9 +929,9 @@ regrid () recv.expanded_for (obox.interior).expand (stencil_size); ASSERT_c (send <= obox.exterior, "Regridding prolongation: Send region must be contained in exterior"); - if (on_this_proc (c, cc)) { + if (on_this_proc (rl, c, cc)) { int const p = dist::rank(); - level.AT(p).fast_old2new_ref_prol_sendrecv.push_back + fast_level.AT(p).fast_old2new_ref_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -935,7 +944,9 @@ regrid () } // if rl > 0 - if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > 0) { + if (int (fast_oldboxes.size()) > ml and + int (fast_oldboxes.AT(ml).size()) > 0) + { // All points must now have been received, either through // synchronisation or through prolongation ASSERT_c (needrecv.empty(), @@ -1038,7 +1049,8 @@ memory () memoryof (buffer_width) + memoryof (prolongation_order_space) + memoryof (boxes) + - memoryof (oldboxes) + + memoryof (fast_boxes) + + memoryof (fast_oldboxes) + memoryof (gfs); } @@ -1059,7 +1071,15 @@ memory () memoryof (sync) + memoryof (bndref) + memoryof (ghosts) + - memoryof (interior) + + memoryof (interior); +} + +size_t +dh::fast_dboxes:: +memory () + const +{ + return memoryof (fast_mg_rest_sendrecv) + memoryof (fast_mg_prol_sendrecv) + memoryof (fast_ref_prol_sendrecv) + @@ -1084,6 +1104,7 @@ output (ostream & os) << "buffer_width=" << buffer_width << "," << "prolongation_order_space=" << prolongation_order_space << "," << "boxes=" << boxes << "," + << "fast_boxes=" << fast_boxes << "," << "gfs={"; { bool isfirst = true; @@ -1103,11 +1124,8 @@ dh::dboxes:: output (ostream & os) const { - os << "dh::dboxes:" << eol; - // Regions: - - os << "regions:" << eol; + os << "dh::dboxes:" << eol; os << "exterior:" << exterior << eol; os << "is_outer_boundary:" << is_outer_boundary << eol; os << "outer_boundaries:" << outer_boundaries << eol; @@ -1120,21 +1138,23 @@ output (ostream & os) os << "bndref:" << bndref << eol; os << "ghosts:" << ghosts << eol; os << "interior:" << interior << eol; - + return os; +} + +ostream & +dh::fast_dboxes:: +output (ostream & os) + const +{ // Communication schedule: - - os << "communication:" << eol; + os << "dh::fast_dboxes:" << eol; os << "fast_mg_rest_sendrecv: " << fast_mg_rest_sendrecv << eol; os << "fast_mg_prol_sendrecv: " << fast_mg_prol_sendrecv << eol; os << "fast_ref_prol_sendrecv: " << fast_ref_prol_sendrecv << eol; os << "fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol; os << "fast_sync_sendrecv: " << fast_sync_sendrecv << eol; os << "fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol; - - // Regridding schedule: - os << "fast_old2new_sync_sendrecv:" << fast_old2new_sync_sendrecv << eol; os << "fast_old2new_ref_prol_sendrecv:" << fast_old2new_ref_prol_sendrecv << eol; - return os; } diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh index 09d3579e5..8e6f67e06 100644 --- a/Carpet/CarpetLib/src/dh.hh +++ b/Carpet/CarpetLib/src/dh.hh @@ -61,6 +61,12 @@ public: ibset ghosts; // ghost zones, as seen from Cactus ibbox interior; // interior (without ghost zones) + size_t memory () const; + ostream & output (ostream & os) const; + }; + + struct fast_dboxes { + // Communication schedule: srpvect fast_mg_rest_sendrecv; @@ -85,6 +91,10 @@ private: typedef vector rboxes; // ... for each refinement level typedef vector mboxes; // ... for each multigrid level + typedef vector fast_cboxes; // ... for each component + typedef vector fast_rboxes; // ... for each refinement level + typedef vector fast_mboxes; // ... for each multigrid level + void @@ -101,6 +111,8 @@ public: // should be readonly mboxes boxes; // grid hierarchy mboxes oldboxes; // old grid hierarchy, used during regridding + fast_mboxes fast_boxes; // grid hierarchy + fast_mboxes fast_oldboxes; list gfs; // list of all grid functions @@ -121,6 +133,12 @@ public: void regrid (); void recompose (int rl, bool do_prolongate); +private: + int this_proc (int rl, int c) const; + bool on_this_proc (int rl, int c) const; + bool on_this_proc (int rl, int c, int cc) const; + +public: // Grid function management void add (ggf * f); void remove (ggf * f); @@ -137,6 +155,11 @@ inline size_t memoryof (dh::dboxes const & b) return b.memory (); } +inline size_t memoryof (dh::fast_dboxes const & b) +{ + return b.memory (); +} + inline size_t memoryof (dh const & d) { return d.memory (); @@ -147,6 +170,11 @@ inline ostream & operator<< (ostream & os, dh::dboxes const & b) return b.output (os); } +inline ostream & operator<< (ostream & os, dh::fast_dboxes const & b) +{ + return b.output (os); +} + inline ostream & operator<< (ostream & os, dh const & d) { return d.output (os); diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc index b5140e22c..b6eefa499 100644 --- a/Carpet/CarpetLib/src/ggf.cc +++ b/Carpet/CarpetLib/src/ggf.cc @@ -175,7 +175,7 @@ void ggf::recompose_fill (comm_state & state, int const rl, for (int tl = 0; tl < timelevels (ml, rl); ++tl) { transfer_from_all (state, tl, rl, ml, - & dh::dboxes::fast_old2new_sync_sendrecv, + & dh::fast_dboxes::fast_old2new_sync_sendrecv, tl, rl, ml, & oldstorage); } // for tl @@ -189,7 +189,7 @@ void ggf::recompose_fill (comm_state & state, int const rl, for (int tl = 0; tl < timelevels (ml, rl); ++tl) { transfer_from_all (state, tl, rl, ml, - & dh::dboxes::fast_old2new_ref_prol_sendrecv, + & dh::fast_dboxes::fast_old2new_ref_prol_sendrecv, tls, rl - 1, ml, t.time (tl, rl, ml)); } // for tl @@ -306,7 +306,7 @@ sync_all (comm_state & state, timer.start (); transfer_from_all (state, tl,rl,ml, - & dh::dboxes::fast_sync_sendrecv, + & dh::fast_dboxes::fast_sync_sendrecv, tl,rl,ml); timer.stop (0); } @@ -346,7 +346,7 @@ ref_bnd_prolongate_all (comm_state & state, } transfer_from_all (state, tl ,rl ,ml, - & dh::dboxes::fast_ref_bnd_prol_sendrecv, + & dh::fast_dboxes::fast_ref_bnd_prol_sendrecv, tl2s,rl-1,ml, time); timer.stop (0); @@ -370,7 +370,7 @@ mg_restrict_all (comm_state & state, vector const tl2s(1,tl); transfer_from_all (state, tl ,rl,ml, - & dh::dboxes::fast_mg_rest_sendrecv, + & dh::fast_dboxes::fast_mg_rest_sendrecv, tl2s,rl,ml-1, time); timer.stop (0); @@ -394,7 +394,7 @@ mg_prolongate_all (comm_state & state, vector const tl2s(1,tl); transfer_from_all (state, tl ,rl,ml, - & dh::dboxes::fast_mg_prol_sendrecv, + & dh::fast_dboxes::fast_mg_prol_sendrecv, tl2s,rl,ml+1, time); timer.stop (0); @@ -419,7 +419,7 @@ ref_restrict_all (comm_state & state, vector const tl2s(1,tl); transfer_from_all (state, tl ,rl ,ml, - & dh::dboxes::fast_ref_rest_sendrecv, + & dh::fast_dboxes::fast_ref_rest_sendrecv, tl2s,rl+1,ml, time); timer.stop (0); @@ -445,7 +445,7 @@ ref_prolongate_all (comm_state & state, for (int i=0; i<=prolongation_order_time; ++i) tl2s.AT(i) = i; transfer_from_all (state, tl ,rl ,ml, - & dh::dboxes::fast_ref_prol_sendrecv, + & dh::fast_dboxes::fast_ref_prol_sendrecv, tl2s,rl-1,ml, time); timer.stop (0); @@ -458,7 +458,7 @@ void ggf:: transfer_from_all (comm_state & state, int const tl1, int const rl1, int const ml1, - srpvect const dh::dboxes::* sendrecvs, + srpvect const dh::fast_dboxes::* sendrecvs, vector const & tl2s, int const rl2, int const ml2, CCTK_REAL const & time, mdata * const srcstorage_) @@ -468,7 +468,7 @@ transfer_from_all (comm_state & state, assert (tl1>=0 and tl1 const & tl2s, int rl2, int ml2, CCTK_REAL const & time, mdata * srcstorage = 0); @@ -161,7 +161,7 @@ protected: void transfer_from_all (comm_state & state, int tl1, int rl1, int ml1, - srpvect const dh::dboxes::* sendrecvs, + srpvect const dh::fast_dboxes::* sendrecvs, int tl2, int rl2, int ml2, mdata * srcstorage = 0) { -- cgit v1.2.3