diff options
-rw-r--r-- | Carpet/CarpetLib/src/dh.cc | 681 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/dh.hh | 42 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/ggf.cc | 260 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/ggf.hh | 36 |
4 files changed, 787 insertions, 232 deletions
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc index 50d1466e6..26f2b01a8 100644 --- a/Carpet/CarpetLib/src/dh.cc +++ b/Carpet/CarpetLib/src/dh.cc @@ -19,6 +19,18 @@ using namespace CarpetLib; +ostream & operator<< (ostream & os, pseudoregion const & p) +{ + return os << p.extent << "/p:" << p.processor; +} + +ostream & operator<< (ostream & os, sendrecv_pseudoregion const & srp) +{ + return os << "(send:" << srp.send << ",recv:" << srp.recv << ")"; +} + + + // Constructors dh:: dh (gh & h_, @@ -65,6 +77,31 @@ prolongation_stencil_size () // Modifiers +// Calculate this quantity on this processor? It does not need to be +// calculated if it won't be used later on. +static inline +int +this_proc (int const c) +{ + return c % dist::size(); +} + +static inline +bool +on_this_proc (int const c) +{ + return this_proc (c) == dist::rank(); +} + +static inline +bool +on_this_proc (int const c, int const cc) +{ + return on_this_proc (c) or on_this_proc (cc); +} + + + bool there_was_an_error = false; static @@ -447,93 +484,109 @@ regrid () // Multigrid restriction: - if (ml > 0) { - int const oml = ml - 1; - - // Multigrid restriction must fill all active points - - dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); - - ibset needrecv = box.active; - - ibset contracted_oactive; - for (ibset::const_iterator - ai = obox.active.begin(); ai != obox.active.end(); ++ ai) - { - ibbox const & oactive = * ai; - contracted_oactive += oactive.contracted_for (box.interior); - } - contracted_oactive.normalize(); - - ibset ovlp = needrecv & contracted_oactive; - ovlp.normalize(); - - for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.mg_rest_recv.push_back (recv); - ibbox const send = recv.expanded_for (obox.interior); - ASSERT_c (send <= obox.exterior, - "Multigrid restriction: Send region must be contained in exterior"); - box.mg_rest_send.push_back (send); - } - - needrecv -= ovlp; - needrecv.normalize(); + if (true or on_this_proc (c)) { + if (ml > 0) { + int const oml = ml - 1; - // All points must have been received - ASSERT_c (needrecv.empty(), - "Multigrid restriction: All points must have been received"); - - } // if ml > 0 + // Multigrid restriction must fill all active points + + dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); + + ibset needrecv = box.active; + + ibset contracted_oactive; + for (ibset::const_iterator + ai = obox.active.begin(); ai != obox.active.end(); ++ ai) + { + ibbox const & oactive = * ai; + contracted_oactive += oactive.contracted_for (box.interior); + } + contracted_oactive.normalize(); + + ibset ovlp = needrecv & contracted_oactive; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.mg_rest_recv.push_back (recv); + ibbox const send = recv.expanded_for (obox.interior); + ASSERT_c (send <= obox.exterior, + "Multigrid restriction: Send region must be contained in exterior"); + box.mg_rest_send.push_back (send); + if (on_this_proc (c)) { + int const p = dist::rank(); + level.AT(p).fast_mg_rest_sendrecv.push_back + (sendrecv_pseudoregion (send, c, recv, c)); + } + } + + needrecv -= ovlp; + needrecv.normalize(); + + // All points must have been received + ASSERT_c (needrecv.empty(), + "Multigrid restriction: All points must have been received"); + + } // if ml > 0 + } // Multigrid prolongation: - if (ml > 0) { - int const oml = ml - 1; - - // Multigrid prolongation must fill all active points (this - // could probably be relaxed) - - dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); - - ibset oneedrecv = obox.active; - - i2vect const stencil_size = i2vect (prolongation_stencil_size()); - - ibset expanded_active; - for (ibset::const_iterator - ai = box.active.begin(); ai != box.active.end(); ++ ai) - { - ibbox const & active = * ai; - expanded_active += active.expanded_for (obox.interior); - } - expanded_active.normalize(); - - ibset ovlp = oneedrecv & expanded_active; - ovlp.normalize(); + if (true or on_this_proc (c)) { + if (ml > 0) { + int const oml = ml - 1; - for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.mg_prol_recv.push_back (recv); - ibbox const send = - recv.expanded_for (box.interior).expand (stencil_size); - ASSERT_c (send <= box.exterior, - "Multigrid prolongation: Send region must be contained in exterior"); - box.mg_prol_send.push_back (send); - } - - oneedrecv -= ovlp; - oneedrecv.normalize(); - - // All points must have been received - ASSERT_c (oneedrecv.empty(), - "Multigrid prolongation: All points must have been received"); - - } // if ml > 0 + // Multigrid prolongation must fill all active points + // (this could probably be relaxed) + + dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); + + ibset oneedrecv = obox.active; + + i2vect const stencil_size = i2vect (prolongation_stencil_size()); + + ibset expanded_active; + for (ibset::const_iterator + ai = box.active.begin(); ai != box.active.end(); ++ ai) + { + ibbox const & active = * ai; + expanded_active += active.expanded_for (obox.interior); + } + expanded_active.normalize(); + + ibset ovlp = oneedrecv & expanded_active; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.mg_prol_recv.push_back (recv); + ibbox const send = + recv.expanded_for (box.interior).expand (stencil_size); + ASSERT_c (send <= box.exterior, + "Multigrid prolongation: Send region must be contained in exterior"); + box.mg_prol_send.push_back (send); + if (on_this_proc (c)) { + int const p = dist::rank(); + level.AT(p).fast_mg_prol_sendrecv.push_back + (sendrecv_pseudoregion (send, c, recv, c)); + } + } + + oneedrecv -= ovlp; + oneedrecv.normalize(); + + // All points must have been received + ASSERT_c (oneedrecv.empty(), + "Multigrid prolongation: All points must have been received"); + + } // if ml > 0 + } @@ -557,36 +610,43 @@ regrid () i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); - - ibset contracted_oactive; - for (ibset::const_iterator - ai = obox.active.begin(); ai != obox.active.end(); ++ ai) - { - ibbox const & oactive = * ai; - // untested for cell centering - contracted_oactive += - oactive.contracted_for (box.interior).expand (reffact); + if (true or on_this_proc (c, cc)) { + dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + + ibset contracted_oactive; + for (ibset::const_iterator + ai = obox.active.begin(); ai != obox.active.end(); ++ ai) + { + ibbox const & oactive = * ai; + // untested for cell centering + contracted_oactive += + oactive.contracted_for (box.interior).expand (reffact); + } + contracted_oactive.normalize(); + + ibset ovlp = needrecv & contracted_oactive; + ovlp.normalize(); + + for (ibset::const_iterator + ri =ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.ref_prol_recv.AT(cc).push_back (recv); + ibbox const send = + recv.expanded_for (obox.interior).expand (stencil_size); + ASSERT_c (send <= obox.exterior, + "Refinement prolongation: Send region must be contained in exterior"); + box.ref_prol_send.AT(cc).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + level.AT(p).fast_ref_prol_sendrecv.push_back + (sendrecv_pseudoregion (send, cc, recv, c)); + } + } + + needrecv -= ovlp; + } - contracted_oactive.normalize(); - - ibset ovlp = needrecv & contracted_oactive; - ovlp.normalize(); - - for (ibset::const_iterator ri = - ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.ref_prol_recv.AT(cc).push_back (recv); - ibbox const send = - recv.expanded_for (obox.interior).expand (stencil_size); - ASSERT_c (send <= obox.exterior, - "Refinement prolongation: Send region must be contained in exterior"); - box.ref_prol_send.AT(cc).push_back (send); - } - - needrecv -= ovlp; - } // for cc needrecv.normalize(); @@ -621,31 +681,39 @@ regrid () ibset & sync = box.sync; for (int cc = 0; cc < h.components(rl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc); - + if (true or on_this_proc (c, cc)) { + dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc); + #if 0 - ibset ovlp = needrecv & obox.owned; + ibset ovlp = needrecv & obox.owned; #else - ibset ovlp = needrecv & obox.interior; + ibset ovlp = needrecv & obox.interior; #endif - ovlp.normalize(); - - if (cc == c) { - ASSERT_cc (ovlp.empty(), - "A region may not synchronise from itself"); - } - - for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.sync_recv.AT(cc).push_back (recv); - ibbox const & send = recv; - box.sync_send.AT(cc).push_back (send); + ovlp.normalize(); + + if (cc == c) { + ASSERT_cc (ovlp.empty(), + "A region may not synchronise from itself"); + } + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.sync_recv.AT(cc).push_back (recv); + ibbox const & send = recv; + box.sync_send.AT(cc).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + level.AT(p).fast_sync_sendrecv.push_back + (sendrecv_pseudoregion (send, cc, recv, c)); + } + } + + needrecv -= ovlp; + sync += ovlp; + } - - needrecv -= ovlp; - sync += ovlp; - } // for cc needrecv.normalize(); @@ -679,37 +747,44 @@ regrid () i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); - - ibset contracted_oactive; - for (ibset::const_iterator - ai = obox.active.begin(); ai != obox.active.end(); ++ ai) - { - ibbox const & oactive = * ai; - // untested for cell centering - contracted_oactive += - oactive.contracted_for (box.interior).expand (reffact); + if (true or on_this_proc (c, cc)) { + dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + + ibset contracted_oactive; + for (ibset::const_iterator + ai = obox.active.begin(); ai != obox.active.end(); ++ ai) + { + ibbox const & oactive = * ai; + // untested for cell centering + contracted_oactive += + oactive.contracted_for (box.interior).expand (reffact); + } + contracted_oactive.normalize(); + + ibset ovlp = needrecv & contracted_oactive; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.ref_bnd_prol_recv.AT(cc).push_back (recv); + ibbox const send = + recv.expanded_for (obox.interior).expand (stencil_size); + ASSERT_c (send <= obox.exterior, + "Boundary prolongation: Send region must be contained in exterior"); + box.ref_bnd_prol_send.AT(cc).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + level.AT(p).fast_ref_bnd_prol_sendrecv.push_back + (sendrecv_pseudoregion (send, cc, recv, c)); + } + } + + needrecv -= ovlp; + bndref += ovlp; + } - contracted_oactive.normalize(); - - ibset ovlp = needrecv & contracted_oactive; - ovlp.normalize(); - - for (ibset::const_iterator ri = - ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.ref_bnd_prol_recv.AT(cc).push_back (recv); - ibbox const send = - recv.expanded_for (obox.interior).expand (stencil_size); - ASSERT_c (send <= obox.exterior, - "Boundary prolongation: Send region must be contained in exterior"); - box.ref_bnd_prol_send.AT(cc).push_back (send); - } - - needrecv -= ovlp; - bndref += ovlp; - } // for cc needrecv.normalize(); @@ -754,10 +829,37 @@ regrid () +#if 0 + optimise2_field + (level, + & dboxes::fast_mg_rest_send, & dboxes::fast_mg_rest_recv, + & dboxes::fast_mg_rest_sendrecv); + optimise2_field + (level, + & dboxes::fast_mg_prol_send, & dboxes::fast_mg_prol_recv, + & dboxes::fast_mg_prol_sendrecv); + + optimise2_field + (level, + & dboxes::fast_ref_prol_send, & dboxes::fast_ref_prol_recv, + & dboxes::fast_ref_prol_sendrecv); + optimise2_field + (level, + & dboxes::fast_sync_send, & dboxes::fast_sync_recv, + & dboxes::fast_sync_sendrecv); + optimise2_field + (level, + & dboxes::fast_ref_bnd_prol_send, & dboxes::fast_ref_bnd_prol_recv, + & dboxes::fast_ref_bnd_prol_sendrecv); +#endif + + + // Refinement restriction: if (rl > 0) { int const orl = rl - 1; + cboxes & olevel = boxes.AT(ml).AT(orl); ibset needrecv; for (int c = 0; c < h.components(rl); ++ c) { @@ -783,33 +885,40 @@ regrid () obox.ref_rest_send.resize (h.components(rl)); for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); - - ibset contracted_active; - for (ibset::const_iterator - ai = box.active.begin(); ai != box.active.end(); ++ ai) - { - ibbox const & active = * ai; - contracted_active += active.contracted_for (obox.interior); + if (true or on_this_proc (c, cc)) { + dboxes const & box = boxes.AT(ml).AT(rl).AT(c); + + ibset contracted_active; + for (ibset::const_iterator + ai = box.active.begin(); ai != box.active.end(); ++ ai) + { + ibbox const & active = * ai; + contracted_active += active.contracted_for (obox.interior); + } + contracted_active.normalize(); + + ibset ovlp = obox.active & contracted_active; + ovlp.normalize(); + + for (ibset::const_iterator + ri =ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + obox.ref_rest_recv.AT(c).push_back (recv); + ibbox const send = recv.expanded_for (box.interior); + ASSERT_c (send <= box.active, + "Refinement restriction: Send region must be contained in active part"); + obox.ref_rest_send.AT(c).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + olevel.AT(p).fast_ref_rest_sendrecv.push_back + (sendrecv_pseudoregion (send, c, recv, cc)); + } + } + + needrecv -= ovlp; + } - contracted_active.normalize(); - - ibset ovlp = obox.active & contracted_active; - ovlp.normalize(); - - for (ibset::const_iterator ri = - ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - obox.ref_rest_recv.AT(c).push_back (recv); - ibbox const send = recv.expanded_for (box.interior); - ASSERT_c (send <= box.active, - "Refinement restriction: Send region must be contained in active part"); - obox.ref_rest_send.AT(c).push_back (send); - } - - needrecv -= ovlp; - } // for c optimise_field @@ -825,6 +934,12 @@ regrid () ASSERT_rl (needrecv.empty(), "Refinement restriction: All points must have been received"); +#if 0 + optimise2_field + (olevel, & dboxes::fast_ref_rest_send, & dboxes::fast_ref_rest_recv, + & dboxes::fast_ref_rest_sendrecv); +#endif + } // if rl > 0 @@ -853,22 +968,29 @@ regrid () box.old2new_sync_send.resize (oldcomponents); for (int cc = 0; cc < oldcomponents; ++ cc) { - dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc); - - ibset ovlp = needrecv & obox.owned; - ovlp.normalize(); - - for (ibset::const_iterator ri = - ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.old2new_sync_recv.AT(cc).push_back (recv); - ibbox const & send = recv; - box.old2new_sync_send.AT(cc).push_back (send); + if (true or on_this_proc (c, cc)) { + dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc); + + ibset ovlp = needrecv & obox.owned; + ovlp.normalize(); + + for (ibset::const_iterator + ri =ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.old2new_sync_recv.AT(cc).push_back (recv); + ibbox const & send = recv; + box.old2new_sync_send.AT(cc).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + level.AT(p).fast_old2new_sync_sendrecv.push_back + (sendrecv_pseudoregion (send, cc, recv, c)); + } + } + + needrecv -= ovlp; + } - - needrecv -= ovlp; - } // for cc needrecv.normalize(); @@ -897,36 +1019,43 @@ regrid () i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); - - ibset contracted_oactive; - for (ibset::const_iterator - ai = obox.active.begin(); ai != obox.active.end(); ++ ai) - { - ibbox const & oactive = * ai; - // untested for cell centering - contracted_oactive += - oactive.contracted_for (box.interior).expand (reffact); + if (true or on_this_proc (c, cc)) { + dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + + ibset contracted_oactive; + for (ibset::const_iterator + ai = obox.active.begin(); ai != obox.active.end(); ++ ai) + { + ibbox const & oactive = * ai; + // untested for cell centering + contracted_oactive += + oactive.contracted_for (box.interior).expand (reffact); + } + contracted_oactive.normalize(); + + ibset ovlp = needrecv & contracted_oactive; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + box.old2new_ref_prol_recv.AT(cc).push_back (recv); + ibbox const send = + recv.expanded_for (obox.interior).expand (stencil_size); + ASSERT_c (send <= obox.exterior, + "Regridding prolongation: Send region must be contained in exterior"); + box.old2new_ref_prol_send.AT(cc).push_back (send); + if (on_this_proc (c, cc)) { + int const p = dist::rank(); + level.AT(p).fast_old2new_ref_prol_sendrecv.push_back + (sendrecv_pseudoregion (send, cc, recv, c)); + } + } + + needrecv -= ovlp; + } - contracted_oactive.normalize(); - - ibset ovlp = needrecv & contracted_oactive; - ovlp.normalize(); - - for (ibset::const_iterator ri = - ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - box.old2new_ref_prol_recv.AT(cc).push_back (recv); - ibbox const send = - recv.expanded_for (obox.interior).expand (stencil_size); - ASSERT_c (send <= obox.exterior, - "Regridding prolongation: Send region must be contained in exterior"); - box.old2new_ref_prol_send.AT(cc).push_back (send); - } - - needrecv -= ovlp; - } // for cc needrecv.normalize(); @@ -963,6 +1092,19 @@ regrid () } // for c +#if 0 + optimise2_field + (level, + & dboxes::fast_old2new_sync_send, + & dboxes::fast_old2new_sync_recv, + & dboxes::fast_old2new_sync_sendrecv); + optimise2_field + (level, + & dboxes::fast_old2new_ref_prol_send, + & dboxes::fast_old2new_ref_prol_recv, + & dboxes::fast_old2new_ref_prol_sendrecv); +#endif + } // for rl } // for m @@ -1069,6 +1211,72 @@ optimise_field (dboxes & box, +#if 0 +void +dh:: +optimise2_field (cboxes & boxes, + iblistvect const dboxes::* const field_send, + iblistvect const dboxes::* const field_recv, + srpvect dboxes::* const fast_field_sendrecv) +{ + int const nc = (int)boxes.size(); + for (int c = 0; c < nc; ++ c) { + assert ((boxes.AT(c).*fast_field_sendrecv).empty()); + } + + int const p = dist::rank(); + dboxes & b = boxes.AT(p); + + size_t num_regions = 0; + + for (int sc = 0; sc < nc; ++ sc) { + for (int rc = 0; rc < nc; ++ rc) { + if (sc == p or rc == p) { + + iblist const & sbl = (boxes.AT(rc).*field_send).AT(sc); + iblist const & rbl = (boxes.AT(rc).*field_recv).AT(sc); + assert (sbl.size() == rbl.size()); + + num_regions += sbl.size(); + + } // if + } // for rc + } // for sc + + (b.*fast_field_sendrecv).reserve (num_regions); + + for (int sc = 0; sc < nc; ++ sc) { + for (int rc = 0; rc < nc; ++ rc) { + if (sc == p or rc == p) { + + iblist const & sbl = (boxes.AT(rc).*field_send).AT(sc); + iblist const & rbl = (boxes.AT(rc).*field_recv).AT(sc); + + for (iblist::const_iterator sbi = srl.begin(), rbi = rrl.begin(); + sbi != sbl.end(); ++ sbi, ++ rbi) + { + ibbox const & sb = * sbi; + ibbox const & rb = * rbi; + + sendrecv_pseudoregion srp; + srp.send.extent = sb; + srp.send.processor = p; + srp.recv.extent = rb; + srp.recv.processor = c; + + (b.*fast_field_sendrecv).push_back (srp); + } + + } // if + } // for rc + } // for sc + + assert ((b.*fast_field_sendrecv).size() == num_regions); +} +#endif + + + void dh:: recompose (int const rl, bool const do_prolongate) @@ -1185,5 +1393,18 @@ output (ostream & os) os << "old2new_ref_prol_recv:" << old2new_ref_prol_recv << eol; os << "old2new_ref_prol_send:" << old2new_ref_prol_send << eol; +#if 0 + // Fast schedule: + + os << " fast_mg_rest_sendrecv:" << fast_mg_rest_sendrecv << eol; + os << " fast_mg_prol_sendrecv:" << fast_mg_prol_sendrecv << eol; + os << " fast_ref_prol_sendrecv:" << fast_ref_prol_sendrecv << eol; + os << " fast_ref_rest_sendrecv:" << fast_ref_rest_sendrecv << eol; + os << " fast_sync_sendrecv:" << fast_sync_sendrecv << eol; + os << " fast_ref_bnd_prol_sendrecv:" << fast_ref_bnd_prol_sendrecv << eol; + os << " fast_old2new_sync_sendrecv:" << fast_old2new_sync_sendrecv << eol; + os << " fast_old2new_ref_prol_sendrecv:" << fast_old2new_ref_prol_sendrecv << eol; +#endif + return os; } diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh index a4df5a516..9011525d7 100644 --- a/Carpet/CarpetLib/src/dh.hh +++ b/Carpet/CarpetLib/src/dh.hh @@ -24,8 +24,32 @@ using namespace std; struct pseudoregion { ibbox extent; int processor; + pseudoregion () + { + } + pseudoregion (ibbox const & extent_, int const processor_) + : extent (extent_), processor (processor_) + { + } }; +ostream & operator<< (ostream & os, pseudoregion const & p); + +struct sendrecv_pseudoregion { + pseudoregion send, recv; + sendrecv_pseudoregion () + { + } + sendrecv_pseudoregion (ibbox const & send_extent, int const send_processor, + ibbox const & recv_extent, int const recv_processor) + : send (pseudoregion (send_extent, send_processor)), + recv (pseudoregion (recv_extent, recv_processor)) + { + } +}; + +ostream & operator<< (ostream & os, sendrecv_pseudoregion const & srp); + // Forward declaration @@ -43,6 +67,7 @@ public: typedef vector<iblist> iblistvect; // vector of lists typedef vector <pseudoregion> pvect; + typedef vector <sendrecv_pseudoregion> srpvect; @@ -101,6 +126,13 @@ public: pvect fast_ref_bnd_prol_recv; pvect fast_ref_bnd_prol_send; + srpvect fast_mg_rest_sendrecv; + srpvect fast_mg_prol_sendrecv; + srpvect fast_ref_prol_sendrecv; + srpvect fast_ref_rest_sendrecv; + srpvect fast_sync_sendrecv; + srpvect fast_ref_bnd_prol_sendrecv; + // Regridding schedule: iblistvect old2new_sync_recv; @@ -113,6 +145,9 @@ public: pvect fast_old2new_ref_prol_recv; pvect fast_old2new_ref_prol_send; + srpvect fast_old2new_sync_sendrecv; + srpvect fast_old2new_ref_prol_sendrecv; + ostream & output (ostream & os) const; }; @@ -139,6 +174,13 @@ private: iblist const dboxes::* field, pvect dboxes::* fast_field); + static + void + optimise2_field (cboxes & bs, + pvect const dboxes::* fast_field_recv, + pvect const dboxes::* fast_field_send, + srpvect dboxes::* fast_field_sendrecv); + public: // should be readonly // Fields diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc index f698835ec..70fe3884b 100644 --- a/Carpet/CarpetLib/src/ggf.cc +++ b/Carpet/CarpetLib/src/ggf.cc @@ -167,6 +167,7 @@ void ggf::recompose_fill (comm_state & state, int const rl, } } +#if 0 for (int c = 0; c < h.components (rl); ++c) { // Initialise from the same level of the old hierarchy, where @@ -200,6 +201,36 @@ void ggf::recompose_fill (comm_state & state, int const rl, } // if do_prolongate } // for c +#endif + + // Initialise from the same level of the old hierarchy, where + // possible + if (rl < (int)oldstorage.AT(ml).size()) { + for (int tl = 0; tl < timelevels (ml, rl); ++tl) { + transfer_from_all (state, + tl, rl, ml, + & dh::dboxes::fast_old2new_sync_sendrecv, + tl, rl, ml, + & oldstorage); + } // for tl + } // if rl + + if (do_prolongate) { + // Initialise from a coarser level of the new hierarchy, where + // possible + if (rl > 0) { + if (transport_operator != op_none and transport_operator != op_sync) { + for (int tl = 0; tl < timelevels (ml, rl); ++tl) { + transfer_from_all (state, + tl, rl, ml, + & dh::dboxes::fast_old2new_ref_prol_sendrecv, + tls, rl - 1, ml, + t.time (tl, rl, ml)); + } // for tl + } // if transport_operator + } // if rl + } // if do_prolongate + } // for ml timer.stop (0); @@ -274,13 +305,13 @@ void ggf::fill (int rl, int c, int ml) { -// Synchronise the boundaries a component +// Synchronise the boundaries of a component void ggf:: sync (comm_state & state, int const tl, int const rl, int const c, int const ml) { - if(transport_operator == op_none) return; + if (transport_operator == op_none) return; // Copy static Timer timer ("sync"); timer.start (); @@ -292,6 +323,23 @@ sync (comm_state & state, timer.stop (0); } +// Synchronise the boundaries of all components +void +ggf:: +sync_all (comm_state & state, + int const tl, int const rl, int const ml) +{ + if (transport_operator == op_none) return; + // Copy + static Timer timer ("sync_all"); + timer.start (); + transfer_from_all (state, + tl,rl,ml, + & dh::dboxes::fast_sync_sendrecv, + tl,rl,ml); + timer.stop (0); +} + // Prolongate the boundaries of a component @@ -334,6 +382,45 @@ ref_bnd_prolongate (comm_state & state, timer.stop (0); } +// Prolongate the boundaries of all components +void +ggf:: +ref_bnd_prolongate_all (comm_state & state, + int const tl, int const rl, int const ml, + CCTK_REAL const time) +{ + // Interpolate + assert (rl>=1); + if (transport_operator == op_none or transport_operator == op_sync) return; + vector<int> tl2s; + static Timer timer ("ref_bnd_prolongate_all"); + timer.start (); + if (transport_operator != op_copy) { + // Interpolation in time + if (not (timelevels(ml,rl) >= prolongation_order_time+1)) { + char * const fullname = CCTK_FullName (varindex); + CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, + "The variable \"%s\" has only %d active time levels, which is not enough for boundary prolongation of order %d", + fullname ? fullname : "<unknown variable>", + timelevels(ml,rl), prolongation_order_time); + free (fullname); + } + assert (timelevels(ml,rl) >= prolongation_order_time+1); + tl2s.resize(prolongation_order_time+1); + for (int i=0; i<=prolongation_order_time; ++i) tl2s.AT(i) = i; + } else { + assert (timelevels(ml,rl) >= 1); + tl2s.resize(1); + tl2s.AT(0) = 0; + } + transfer_from_all (state, + tl ,rl ,ml, + & dh::dboxes::fast_ref_bnd_prol_sendrecv, + tl2s,rl-1,ml, + time); + timer.stop (0); +} + // Restrict a multigrid level @@ -359,6 +446,28 @@ mg_restrict (comm_state & state, timer.stop (0); } +// Restrict a multigrid level +void +ggf:: +mg_restrict_all (comm_state & state, + int const tl, int const rl, int const ml, + CCTK_REAL const time) +{ + static Timer timer ("mg_restrict_all"); + timer.start (); + // Require same times + static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature"); + assert (abs(t.get_time(rl,ml) - t.get_time(rl,ml-1)) + <= 1.0e-8 * abs(t.get_time(rl,ml))); + vector<int> const tl2s(1,tl); + transfer_from_all (state, + tl ,rl,ml, + & dh::dboxes::fast_mg_rest_sendrecv, + tl2s,rl,ml-1, + time); + timer.stop (0); +} + // Prolongate a multigrid level @@ -384,6 +493,28 @@ mg_prolongate (comm_state & state, timer.stop (0); } +// Prolongate a multigrid level +void +ggf:: +mg_prolongate_all (comm_state & state, + int const tl, int const rl, int const ml, + CCTK_REAL const time) +{ + static Timer timer ("mg_prolongate_all"); + timer.start (); + // Require same times + static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature"); + assert (abs(t.get_time(rl,ml) - t.get_time(rl,ml+1)) + <= 1.0e-8 * abs(t.get_time(rl,ml))); + vector<int> const tl2s(1,tl); + transfer_from_all (state, + tl ,rl,ml, + & dh::dboxes::fast_mg_prol_sendrecv, + tl2s,rl,ml+1, + time); + timer.stop (0); +} + // Restrict a refinement level @@ -410,6 +541,29 @@ ref_restrict (comm_state & state, timer.stop (0); } +// Restrict a refinement level +void +ggf:: +ref_restrict_all (comm_state & state, + int const tl, int const rl, int const ml, + CCTK_REAL const time) +{ + // Require same times + static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature"); + assert (abs(t.get_time(rl,ml) - t.get_time(rl+1,ml)) + <= 1.0e-8 * abs(t.get_time(rl,ml))); + if (transport_operator == op_none or transport_operator == op_sync) return; + static Timer timer ("ref_restrict_all"); + timer.start (); + vector<int> const tl2s(1,tl); + transfer_from_all (state, + tl ,rl ,ml, + & dh::dboxes::fast_ref_rest_sendrecv, + tl2s,rl+1,ml, + time); + timer.stop (0); +} + // Prolongate a refinement level @@ -437,6 +591,30 @@ ref_prolongate (comm_state & state, timer.stop (0); } +// Prolongate a refinement level +void +ggf:: +ref_prolongate_all (comm_state & state, + int const tl, int const rl, int const ml, + CCTK_REAL const time) +{ + assert (rl>=1); + if (transport_operator == op_none or transport_operator == op_sync) return; + static Timer timer ("ref_prolongate_all"); + timer.start (); + vector<int> tl2s; + // Interpolation in time + assert (timelevels(ml,rl) >= prolongation_order_time+1); + tl2s.resize(prolongation_order_time+1); + for (int i=0; i<=prolongation_order_time; ++i) tl2s.AT(i) = i; + transfer_from_all (state, + tl ,rl ,ml, + & dh::dboxes::fast_ref_prol_sendrecv, + tl2s,rl-1,ml, + time); + timer.stop (0); +} + // Transfer regions @@ -459,6 +637,9 @@ transfer_from (comm_state & state, pvect const & psends = d.boxes.AT(ml1).AT(rl1).AT(c1).*sends; assert (precvs.size() == psends.size()); + // Return early if this communication does not concern us + if (precvs.empty()) return; + mdata & srcstorage = srcstorage_ ? * srcstorage_ : storage; if (not precvs.empty()) { @@ -475,6 +656,7 @@ transfer_from (comm_state & state, total.start (); // Interpolation orders + assert (transport_operator != op_none); int const pos = transport_operator == op_copy ? 0 : d.prolongation_order_space; int const pot = @@ -513,3 +695,77 @@ transfer_from (comm_state & state, total.stop (0); } + +// Transfer regions of all components +void +ggf:: +transfer_from_all (comm_state & state, + int const tl1, int const rl1, int const ml1, + srpvect const dh::dboxes::* sendrecvs, + vector<int> const & tl2s, int const rl2, int const ml2, + CCTK_REAL const & time, + mdata * const srcstorage_) +{ + assert (rl1>=0 and rl1<h.reflevels()); + assert (ml1>=0 and ml1<h.mglevels()); + assert (tl1>=0 and tl1<timelevels(ml1,rl1)); + + int const p = dist::rank(); + srpvect const & psendrecvs = d.boxes.AT(ml1).AT(rl1).AT(p).*sendrecvs; + + // Return early if this communication does not concern us + if (psendrecvs.empty()) return; + + static Timer total ("transfer_from_all"); + total.start (); + + mdata & srcstorage = srcstorage_ ? * srcstorage_ : storage; + + assert ( ml2<(int)srcstorage.size()); + assert (rl2>=0 and rl2<(int)srcstorage.AT(ml2).size()); + for (size_t i = 0; i < tl2s.size(); ++ i) { + int const tl2 = tl2s.AT(i); + assert (tl2>=0 and tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(0).size()); + } + + // Set up source times + vector<CCTK_REAL> times(tl2s.size()); + for (size_t i=0; i<tl2s.size(); ++i) { + assert (tl2s.AT(i)>=0 and tl2s.AT(i)<timelevels(ml2,rl2)); + for (size_t j=0; j<i; ++j) { + assert (tl2s.AT(i) != tl2s.AT(j)); + } + times.AT(i) = t.time(tl2s.AT(i),rl2,ml2); + } + + // Interpolation orders + assert (transport_operator != op_none); + int const pos = + transport_operator == op_copy ? 0 : d.prolongation_order_space; + int const pot = + transport_operator == op_copy ? 0 : prolongation_order_time; + + vector<const gdata*> gsrcs(tl2s.size()); + + // Walk all regions + for (srpvect::const_iterator ipsendrecv = psendrecvs.begin(); + ipsendrecv!=psendrecvs.end(); ++ ipsendrecv) + { + pseudoregion const & psend = (* ipsendrecv).send; + pseudoregion const & precv = (* ipsendrecv).recv; + ibbox const & send = psend.extent; + ibbox const & recv = precv.extent; + int const c2 = psend.processor; + int const c1 = precv.processor; + + // Source and destination data + gdata * const dst = storage.AT(ml1).AT(rl1).AT(c1).AT(tl1); + cdata const & srcs = srcstorage.AT(ml2).AT(rl2); + for (int i=0; i<(int)gsrcs.size(); ++i) { + gsrcs.AT(i) = srcs.AT(c2).AT(tl2s.AT(i)); + } + dst->transfer_from (state, gsrcs, times, recv, send, time, pos, pot); + } + + total.stop (0); +} diff --git a/Carpet/CarpetLib/src/ggf.hh b/Carpet/CarpetLib/src/ggf.hh index 99eae2167..199d3b5a1 100644 --- a/Carpet/CarpetLib/src/ggf.hh +++ b/Carpet/CarpetLib/src/ggf.hh @@ -34,6 +34,7 @@ class ggf { typedef vector<iblist> iblistvect; typedef vector <pseudoregion> pvect; + typedef vector <sendrecv_pseudoregion> srpvect; typedef gdata* tdata; // data ... typedef vector<tdata> fdata; // ... for each time level @@ -114,26 +115,37 @@ public: // Synchronise the boundaries of a component void sync (comm_state& state, int tl, int rl, int c, int ml); + void sync_all (comm_state& state, int tl, int rl, int ml); // Prolongate the boundaries of a component void ref_bnd_prolongate (comm_state& state, int tl, int rl, int c, int ml, CCTK_REAL time); + void ref_bnd_prolongate_all (comm_state& state, + int tl, int rl, int ml, CCTK_REAL time); // Restrict a multigrid level void mg_restrict (comm_state& state, int tl, int rl, int c, int ml, CCTK_REAL time); + void mg_restrict_all (comm_state& state, + int tl, int rl, int ml, CCTK_REAL time); // Prolongate a multigrid level void mg_prolongate (comm_state& state, int tl, int rl, int c, int ml, CCTK_REAL time); + void mg_prolongate_all (comm_state& state, + int tl, int rl, int ml, CCTK_REAL time); // Restrict a refinement level void ref_restrict (comm_state& state, int tl, int rl, int c, int ml, CCTK_REAL time); + void ref_restrict_all (comm_state& state, + int tl, int rl, int ml, CCTK_REAL time); // Prolongate a refinement level void ref_prolongate (comm_state& state, int tl, int rl, int c, int ml, CCTK_REAL time); + void ref_prolongate_all (comm_state& state, + int tl, int rl, int ml, CCTK_REAL time); @@ -156,6 +168,13 @@ protected: vector<int> const & tl2s, int rl2, int ml2, CCTK_REAL const & time, mdata * srcstorage = 0); + void + transfer_from_all (comm_state & state, + int tl1, int rl1, int ml1, + srpvect const dh::dboxes::* sendrecvs, + vector<int> const & tl2s, int rl2, int ml2, + CCTK_REAL const & time, + mdata * srcstorage = 0); void transfer_from (comm_state & state, @@ -175,6 +194,23 @@ protected: time, srcstorage); } + void + transfer_from_all (comm_state & state, + int tl1, int rl1, int ml1, + srpvect const dh::dboxes::* sendrecvs, + int tl2, int rl2, int ml2, + mdata * srcstorage = 0) + { + vector <int> tl2s(1); + tl2s.AT(0) = tl2; + CCTK_REAL const time = t.time (tl2,rl2,ml2); + transfer_from_all (state, + tl1, rl1, ml1, + sendrecvs, + tl2s, rl2, ml2, + time, + srcstorage); + } |