aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2008-01-14 15:22:00 +0000
committerErik Schnetter <schnetter@cct.lsu.edu>2008-01-14 15:22:00 +0000
commit17a5e3d6aba8a5e25035e8971c644a23c4fbe544 (patch)
treeabbbdf1214ee21926c416cf40b88b6488b0ac1ab /Carpet
parent3b10725507279e70233706cdf63bce2072079bd2 (diff)
CarpetLib: Introduce new communication schedule
Store the communication schedule more efficiently: store only the non-zero parts. This saves much time when traversing the schedule. Add new communication routines which communicate not between only two individual components, but between all components. darcs-hash:20080114152229-dae7b-7b7ba51bd8b5de0a0009ea236f4a894667b0281b.gz
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/CarpetLib/src/dh.cc681
-rw-r--r--Carpet/CarpetLib/src/dh.hh42
-rw-r--r--Carpet/CarpetLib/src/ggf.cc260
-rw-r--r--Carpet/CarpetLib/src/ggf.hh36
4 files changed, 787 insertions, 232 deletions
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc
index 50d1466e6..26f2b01a8 100644
--- a/Carpet/CarpetLib/src/dh.cc
+++ b/Carpet/CarpetLib/src/dh.cc
@@ -19,6 +19,18 @@ using namespace CarpetLib;
+ostream & operator<< (ostream & os, pseudoregion const & p)
+{
+ return os << p.extent << "/p:" << p.processor;
+}
+
+ostream & operator<< (ostream & os, sendrecv_pseudoregion const & srp)
+{
+ return os << "(send:" << srp.send << ",recv:" << srp.recv << ")";
+}
+
+
+
// Constructors
dh::
dh (gh & h_,
@@ -65,6 +77,31 @@ prolongation_stencil_size ()
// Modifiers
+// Calculate this quantity on this processor? It does not need to be
+// calculated if it won't be used later on.
+static inline
+int
+this_proc (int const c)
+{
+ return c % dist::size();
+}
+
+static inline
+bool
+on_this_proc (int const c)
+{
+ return this_proc (c) == dist::rank();
+}
+
+static inline
+bool
+on_this_proc (int const c, int const cc)
+{
+ return on_this_proc (c) or on_this_proc (cc);
+}
+
+
+
bool there_was_an_error = false;
static
@@ -447,93 +484,109 @@ regrid ()
// Multigrid restriction:
- if (ml > 0) {
- int const oml = ml - 1;
-
- // Multigrid restriction must fill all active points
-
- dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
-
- ibset needrecv = box.active;
-
- ibset contracted_oactive;
- for (ibset::const_iterator
- ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
- {
- ibbox const & oactive = * ai;
- contracted_oactive += oactive.contracted_for (box.interior);
- }
- contracted_oactive.normalize();
-
- ibset ovlp = needrecv & contracted_oactive;
- ovlp.normalize();
-
- for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.mg_rest_recv.push_back (recv);
- ibbox const send = recv.expanded_for (obox.interior);
- ASSERT_c (send <= obox.exterior,
- "Multigrid restriction: Send region must be contained in exterior");
- box.mg_rest_send.push_back (send);
- }
-
- needrecv -= ovlp;
- needrecv.normalize();
+ if (true or on_this_proc (c)) {
+ if (ml > 0) {
+ int const oml = ml - 1;
- // All points must have been received
- ASSERT_c (needrecv.empty(),
- "Multigrid restriction: All points must have been received");
-
- } // if ml > 0
+ // Multigrid restriction must fill all active points
+
+ dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
+
+ ibset needrecv = box.active;
+
+ ibset contracted_oactive;
+ for (ibset::const_iterator
+ ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
+ {
+ ibbox const & oactive = * ai;
+ contracted_oactive += oactive.contracted_for (box.interior);
+ }
+ contracted_oactive.normalize();
+
+ ibset ovlp = needrecv & contracted_oactive;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.mg_rest_recv.push_back (recv);
+ ibbox const send = recv.expanded_for (obox.interior);
+ ASSERT_c (send <= obox.exterior,
+ "Multigrid restriction: Send region must be contained in exterior");
+ box.mg_rest_send.push_back (send);
+ if (on_this_proc (c)) {
+ int const p = dist::rank();
+ level.AT(p).fast_mg_rest_sendrecv.push_back
+ (sendrecv_pseudoregion (send, c, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+ needrecv.normalize();
+
+ // All points must have been received
+ ASSERT_c (needrecv.empty(),
+ "Multigrid restriction: All points must have been received");
+
+ } // if ml > 0
+ }
// Multigrid prolongation:
- if (ml > 0) {
- int const oml = ml - 1;
-
- // Multigrid prolongation must fill all active points (this
- // could probably be relaxed)
-
- dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
-
- ibset oneedrecv = obox.active;
-
- i2vect const stencil_size = i2vect (prolongation_stencil_size());
-
- ibset expanded_active;
- for (ibset::const_iterator
- ai = box.active.begin(); ai != box.active.end(); ++ ai)
- {
- ibbox const & active = * ai;
- expanded_active += active.expanded_for (obox.interior);
- }
- expanded_active.normalize();
-
- ibset ovlp = oneedrecv & expanded_active;
- ovlp.normalize();
+ if (true or on_this_proc (c)) {
+ if (ml > 0) {
+ int const oml = ml - 1;
- for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.mg_prol_recv.push_back (recv);
- ibbox const send =
- recv.expanded_for (box.interior).expand (stencil_size);
- ASSERT_c (send <= box.exterior,
- "Multigrid prolongation: Send region must be contained in exterior");
- box.mg_prol_send.push_back (send);
- }
-
- oneedrecv -= ovlp;
- oneedrecv.normalize();
-
- // All points must have been received
- ASSERT_c (oneedrecv.empty(),
- "Multigrid prolongation: All points must have been received");
-
- } // if ml > 0
+ // Multigrid prolongation must fill all active points
+ // (this could probably be relaxed)
+
+ dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
+
+ ibset oneedrecv = obox.active;
+
+ i2vect const stencil_size = i2vect (prolongation_stencil_size());
+
+ ibset expanded_active;
+ for (ibset::const_iterator
+ ai = box.active.begin(); ai != box.active.end(); ++ ai)
+ {
+ ibbox const & active = * ai;
+ expanded_active += active.expanded_for (obox.interior);
+ }
+ expanded_active.normalize();
+
+ ibset ovlp = oneedrecv & expanded_active;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.mg_prol_recv.push_back (recv);
+ ibbox const send =
+ recv.expanded_for (box.interior).expand (stencil_size);
+ ASSERT_c (send <= box.exterior,
+ "Multigrid prolongation: Send region must be contained in exterior");
+ box.mg_prol_send.push_back (send);
+ if (on_this_proc (c)) {
+ int const p = dist::rank();
+ level.AT(p).fast_mg_prol_sendrecv.push_back
+ (sendrecv_pseudoregion (send, c, recv, c));
+ }
+ }
+
+ oneedrecv -= ovlp;
+ oneedrecv.normalize();
+
+ // All points must have been received
+ ASSERT_c (oneedrecv.empty(),
+ "Multigrid prolongation: All points must have been received");
+
+ } // if ml > 0
+ }
@@ -557,36 +610,43 @@ regrid ()
i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
-
- ibset contracted_oactive;
- for (ibset::const_iterator
- ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
- {
- ibbox const & oactive = * ai;
- // untested for cell centering
- contracted_oactive +=
- oactive.contracted_for (box.interior).expand (reffact);
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+
+ ibset contracted_oactive;
+ for (ibset::const_iterator
+ ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
+ {
+ ibbox const & oactive = * ai;
+ // untested for cell centering
+ contracted_oactive +=
+ oactive.contracted_for (box.interior).expand (reffact);
+ }
+ contracted_oactive.normalize();
+
+ ibset ovlp = needrecv & contracted_oactive;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.ref_prol_recv.AT(cc).push_back (recv);
+ ibbox const send =
+ recv.expanded_for (obox.interior).expand (stencil_size);
+ ASSERT_c (send <= obox.exterior,
+ "Refinement prolongation: Send region must be contained in exterior");
+ box.ref_prol_send.AT(cc).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ level.AT(p).fast_ref_prol_sendrecv.push_back
+ (sendrecv_pseudoregion (send, cc, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+
}
- contracted_oactive.normalize();
-
- ibset ovlp = needrecv & contracted_oactive;
- ovlp.normalize();
-
- for (ibset::const_iterator ri =
- ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.ref_prol_recv.AT(cc).push_back (recv);
- ibbox const send =
- recv.expanded_for (obox.interior).expand (stencil_size);
- ASSERT_c (send <= obox.exterior,
- "Refinement prolongation: Send region must be contained in exterior");
- box.ref_prol_send.AT(cc).push_back (send);
- }
-
- needrecv -= ovlp;
-
} // for cc
needrecv.normalize();
@@ -621,31 +681,39 @@ regrid ()
ibset & sync = box.sync;
for (int cc = 0; cc < h.components(rl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc);
-
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc);
+
#if 0
- ibset ovlp = needrecv & obox.owned;
+ ibset ovlp = needrecv & obox.owned;
#else
- ibset ovlp = needrecv & obox.interior;
+ ibset ovlp = needrecv & obox.interior;
#endif
- ovlp.normalize();
-
- if (cc == c) {
- ASSERT_cc (ovlp.empty(),
- "A region may not synchronise from itself");
- }
-
- for (ibset::const_iterator ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.sync_recv.AT(cc).push_back (recv);
- ibbox const & send = recv;
- box.sync_send.AT(cc).push_back (send);
+ ovlp.normalize();
+
+ if (cc == c) {
+ ASSERT_cc (ovlp.empty(),
+ "A region may not synchronise from itself");
+ }
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.sync_recv.AT(cc).push_back (recv);
+ ibbox const & send = recv;
+ box.sync_send.AT(cc).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ level.AT(p).fast_sync_sendrecv.push_back
+ (sendrecv_pseudoregion (send, cc, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+ sync += ovlp;
+
}
-
- needrecv -= ovlp;
- sync += ovlp;
-
} // for cc
needrecv.normalize();
@@ -679,37 +747,44 @@ regrid ()
i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
-
- ibset contracted_oactive;
- for (ibset::const_iterator
- ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
- {
- ibbox const & oactive = * ai;
- // untested for cell centering
- contracted_oactive +=
- oactive.contracted_for (box.interior).expand (reffact);
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+
+ ibset contracted_oactive;
+ for (ibset::const_iterator
+ ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
+ {
+ ibbox const & oactive = * ai;
+ // untested for cell centering
+ contracted_oactive +=
+ oactive.contracted_for (box.interior).expand (reffact);
+ }
+ contracted_oactive.normalize();
+
+ ibset ovlp = needrecv & contracted_oactive;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.ref_bnd_prol_recv.AT(cc).push_back (recv);
+ ibbox const send =
+ recv.expanded_for (obox.interior).expand (stencil_size);
+ ASSERT_c (send <= obox.exterior,
+ "Boundary prolongation: Send region must be contained in exterior");
+ box.ref_bnd_prol_send.AT(cc).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ level.AT(p).fast_ref_bnd_prol_sendrecv.push_back
+ (sendrecv_pseudoregion (send, cc, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+ bndref += ovlp;
+
}
- contracted_oactive.normalize();
-
- ibset ovlp = needrecv & contracted_oactive;
- ovlp.normalize();
-
- for (ibset::const_iterator ri =
- ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.ref_bnd_prol_recv.AT(cc).push_back (recv);
- ibbox const send =
- recv.expanded_for (obox.interior).expand (stencil_size);
- ASSERT_c (send <= obox.exterior,
- "Boundary prolongation: Send region must be contained in exterior");
- box.ref_bnd_prol_send.AT(cc).push_back (send);
- }
-
- needrecv -= ovlp;
- bndref += ovlp;
-
} // for cc
needrecv.normalize();
@@ -754,10 +829,37 @@ regrid ()
+#if 0
+ optimise2_field
+ (level,
+ & dboxes::fast_mg_rest_send, & dboxes::fast_mg_rest_recv,
+ & dboxes::fast_mg_rest_sendrecv);
+ optimise2_field
+ (level,
+ & dboxes::fast_mg_prol_send, & dboxes::fast_mg_prol_recv,
+ & dboxes::fast_mg_prol_sendrecv);
+
+ optimise2_field
+ (level,
+ & dboxes::fast_ref_prol_send, & dboxes::fast_ref_prol_recv,
+ & dboxes::fast_ref_prol_sendrecv);
+ optimise2_field
+ (level,
+ & dboxes::fast_sync_send, & dboxes::fast_sync_recv,
+ & dboxes::fast_sync_sendrecv);
+ optimise2_field
+ (level,
+ & dboxes::fast_ref_bnd_prol_send, & dboxes::fast_ref_bnd_prol_recv,
+ & dboxes::fast_ref_bnd_prol_sendrecv);
+#endif
+
+
+
// Refinement restriction:
if (rl > 0) {
int const orl = rl - 1;
+ cboxes & olevel = boxes.AT(ml).AT(orl);
ibset needrecv;
for (int c = 0; c < h.components(rl); ++ c) {
@@ -783,33 +885,40 @@ regrid ()
obox.ref_rest_send.resize (h.components(rl));
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
-
- ibset contracted_active;
- for (ibset::const_iterator
- ai = box.active.begin(); ai != box.active.end(); ++ ai)
- {
- ibbox const & active = * ai;
- contracted_active += active.contracted_for (obox.interior);
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
+
+ ibset contracted_active;
+ for (ibset::const_iterator
+ ai = box.active.begin(); ai != box.active.end(); ++ ai)
+ {
+ ibbox const & active = * ai;
+ contracted_active += active.contracted_for (obox.interior);
+ }
+ contracted_active.normalize();
+
+ ibset ovlp = obox.active & contracted_active;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ obox.ref_rest_recv.AT(c).push_back (recv);
+ ibbox const send = recv.expanded_for (box.interior);
+ ASSERT_c (send <= box.active,
+ "Refinement restriction: Send region must be contained in active part");
+ obox.ref_rest_send.AT(c).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ olevel.AT(p).fast_ref_rest_sendrecv.push_back
+ (sendrecv_pseudoregion (send, c, recv, cc));
+ }
+ }
+
+ needrecv -= ovlp;
+
}
- contracted_active.normalize();
-
- ibset ovlp = obox.active & contracted_active;
- ovlp.normalize();
-
- for (ibset::const_iterator ri =
- ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- obox.ref_rest_recv.AT(c).push_back (recv);
- ibbox const send = recv.expanded_for (box.interior);
- ASSERT_c (send <= box.active,
- "Refinement restriction: Send region must be contained in active part");
- obox.ref_rest_send.AT(c).push_back (send);
- }
-
- needrecv -= ovlp;
-
} // for c
optimise_field
@@ -825,6 +934,12 @@ regrid ()
ASSERT_rl (needrecv.empty(),
"Refinement restriction: All points must have been received");
+#if 0
+ optimise2_field
+ (olevel, & dboxes::fast_ref_rest_send, & dboxes::fast_ref_rest_recv,
+ & dboxes::fast_ref_rest_sendrecv);
+#endif
+
} // if rl > 0
@@ -853,22 +968,29 @@ regrid ()
box.old2new_sync_send.resize (oldcomponents);
for (int cc = 0; cc < oldcomponents; ++ cc) {
- dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc);
-
- ibset ovlp = needrecv & obox.owned;
- ovlp.normalize();
-
- for (ibset::const_iterator ri =
- ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.old2new_sync_recv.AT(cc).push_back (recv);
- ibbox const & send = recv;
- box.old2new_sync_send.AT(cc).push_back (send);
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc);
+
+ ibset ovlp = needrecv & obox.owned;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.old2new_sync_recv.AT(cc).push_back (recv);
+ ibbox const & send = recv;
+ box.old2new_sync_send.AT(cc).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ level.AT(p).fast_old2new_sync_sendrecv.push_back
+ (sendrecv_pseudoregion (send, cc, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+
}
-
- needrecv -= ovlp;
-
} // for cc
needrecv.normalize();
@@ -897,36 +1019,43 @@ regrid ()
i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
-
- ibset contracted_oactive;
- for (ibset::const_iterator
- ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
- {
- ibbox const & oactive = * ai;
- // untested for cell centering
- contracted_oactive +=
- oactive.contracted_for (box.interior).expand (reffact);
+ if (true or on_this_proc (c, cc)) {
+ dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+
+ ibset contracted_oactive;
+ for (ibset::const_iterator
+ ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
+ {
+ ibbox const & oactive = * ai;
+ // untested for cell centering
+ contracted_oactive +=
+ oactive.contracted_for (box.interior).expand (reffact);
+ }
+ contracted_oactive.normalize();
+
+ ibset ovlp = needrecv & contracted_oactive;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ box.old2new_ref_prol_recv.AT(cc).push_back (recv);
+ ibbox const send =
+ recv.expanded_for (obox.interior).expand (stencil_size);
+ ASSERT_c (send <= obox.exterior,
+ "Regridding prolongation: Send region must be contained in exterior");
+ box.old2new_ref_prol_send.AT(cc).push_back (send);
+ if (on_this_proc (c, cc)) {
+ int const p = dist::rank();
+ level.AT(p).fast_old2new_ref_prol_sendrecv.push_back
+ (sendrecv_pseudoregion (send, cc, recv, c));
+ }
+ }
+
+ needrecv -= ovlp;
+
}
- contracted_oactive.normalize();
-
- ibset ovlp = needrecv & contracted_oactive;
- ovlp.normalize();
-
- for (ibset::const_iterator ri =
- ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- box.old2new_ref_prol_recv.AT(cc).push_back (recv);
- ibbox const send =
- recv.expanded_for (obox.interior).expand (stencil_size);
- ASSERT_c (send <= obox.exterior,
- "Regridding prolongation: Send region must be contained in exterior");
- box.old2new_ref_prol_send.AT(cc).push_back (send);
- }
-
- needrecv -= ovlp;
-
} // for cc
needrecv.normalize();
@@ -963,6 +1092,19 @@ regrid ()
} // for c
+#if 0
+ optimise2_field
+ (level,
+ & dboxes::fast_old2new_sync_send,
+ & dboxes::fast_old2new_sync_recv,
+ & dboxes::fast_old2new_sync_sendrecv);
+ optimise2_field
+ (level,
+ & dboxes::fast_old2new_ref_prol_send,
+ & dboxes::fast_old2new_ref_prol_recv,
+ & dboxes::fast_old2new_ref_prol_sendrecv);
+#endif
+
} // for rl
} // for m
@@ -1069,6 +1211,72 @@ optimise_field (dboxes & box,
+#if 0
+void
+dh::
+optimise2_field (cboxes & boxes,
+ iblistvect const dboxes::* const field_send,
+ iblistvect const dboxes::* const field_recv,
+ srpvect dboxes::* const fast_field_sendrecv)
+{
+ int const nc = (int)boxes.size();
+ for (int c = 0; c < nc; ++ c) {
+ assert ((boxes.AT(c).*fast_field_sendrecv).empty());
+ }
+
+ int const p = dist::rank();
+ dboxes & b = boxes.AT(p);
+
+ size_t num_regions = 0;
+
+ for (int sc = 0; sc < nc; ++ sc) {
+ for (int rc = 0; rc < nc; ++ rc) {
+ if (sc == p or rc == p) {
+
+ iblist const & sbl = (boxes.AT(rc).*field_send).AT(sc);
+ iblist const & rbl = (boxes.AT(rc).*field_recv).AT(sc);
+ assert (sbl.size() == rbl.size());
+
+ num_regions += sbl.size();
+
+ } // if
+ } // for rc
+ } // for sc
+
+ (b.*fast_field_sendrecv).reserve (num_regions);
+
+ for (int sc = 0; sc < nc; ++ sc) {
+ for (int rc = 0; rc < nc; ++ rc) {
+ if (sc == p or rc == p) {
+
+ iblist const & sbl = (boxes.AT(rc).*field_send).AT(sc);
+ iblist const & rbl = (boxes.AT(rc).*field_recv).AT(sc);
+
+ for (iblist::const_iterator sbi = srl.begin(), rbi = rrl.begin();
+ sbi != sbl.end(); ++ sbi, ++ rbi)
+ {
+ ibbox const & sb = * sbi;
+ ibbox const & rb = * rbi;
+
+ sendrecv_pseudoregion srp;
+ srp.send.extent = sb;
+ srp.send.processor = p;
+ srp.recv.extent = rb;
+ srp.recv.processor = c;
+
+ (b.*fast_field_sendrecv).push_back (srp);
+ }
+
+ } // if
+ } // for rc
+ } // for sc
+
+ assert ((b.*fast_field_sendrecv).size() == num_regions);
+}
+#endif
+
+
+
void
dh::
recompose (int const rl, bool const do_prolongate)
@@ -1185,5 +1393,18 @@ output (ostream & os)
os << "old2new_ref_prol_recv:" << old2new_ref_prol_recv << eol;
os << "old2new_ref_prol_send:" << old2new_ref_prol_send << eol;
+#if 0
+ // Fast schedule:
+
+ os << " fast_mg_rest_sendrecv:" << fast_mg_rest_sendrecv << eol;
+ os << " fast_mg_prol_sendrecv:" << fast_mg_prol_sendrecv << eol;
+ os << " fast_ref_prol_sendrecv:" << fast_ref_prol_sendrecv << eol;
+ os << " fast_ref_rest_sendrecv:" << fast_ref_rest_sendrecv << eol;
+ os << " fast_sync_sendrecv:" << fast_sync_sendrecv << eol;
+ os << " fast_ref_bnd_prol_sendrecv:" << fast_ref_bnd_prol_sendrecv << eol;
+ os << " fast_old2new_sync_sendrecv:" << fast_old2new_sync_sendrecv << eol;
+ os << " fast_old2new_ref_prol_sendrecv:" << fast_old2new_ref_prol_sendrecv << eol;
+#endif
+
return os;
}
diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh
index a4df5a516..9011525d7 100644
--- a/Carpet/CarpetLib/src/dh.hh
+++ b/Carpet/CarpetLib/src/dh.hh
@@ -24,8 +24,32 @@ using namespace std;
struct pseudoregion {
ibbox extent;
int processor;
+ pseudoregion ()
+ {
+ }
+ pseudoregion (ibbox const & extent_, int const processor_)
+ : extent (extent_), processor (processor_)
+ {
+ }
};
+ostream & operator<< (ostream & os, pseudoregion const & p);
+
+struct sendrecv_pseudoregion {
+ pseudoregion send, recv;
+ sendrecv_pseudoregion ()
+ {
+ }
+ sendrecv_pseudoregion (ibbox const & send_extent, int const send_processor,
+ ibbox const & recv_extent, int const recv_processor)
+ : send (pseudoregion (send_extent, send_processor)),
+ recv (pseudoregion (recv_extent, recv_processor))
+ {
+ }
+};
+
+ostream & operator<< (ostream & os, sendrecv_pseudoregion const & srp);
+
// Forward declaration
@@ -43,6 +67,7 @@ public:
typedef vector<iblist> iblistvect; // vector of lists
typedef vector <pseudoregion> pvect;
+ typedef vector <sendrecv_pseudoregion> srpvect;
@@ -101,6 +126,13 @@ public:
pvect fast_ref_bnd_prol_recv;
pvect fast_ref_bnd_prol_send;
+ srpvect fast_mg_rest_sendrecv;
+ srpvect fast_mg_prol_sendrecv;
+ srpvect fast_ref_prol_sendrecv;
+ srpvect fast_ref_rest_sendrecv;
+ srpvect fast_sync_sendrecv;
+ srpvect fast_ref_bnd_prol_sendrecv;
+
// Regridding schedule:
iblistvect old2new_sync_recv;
@@ -113,6 +145,9 @@ public:
pvect fast_old2new_ref_prol_recv;
pvect fast_old2new_ref_prol_send;
+ srpvect fast_old2new_sync_sendrecv;
+ srpvect fast_old2new_ref_prol_sendrecv;
+
ostream & output (ostream & os) const;
};
@@ -139,6 +174,13 @@ private:
iblist const dboxes::* field,
pvect dboxes::* fast_field);
+ static
+ void
+ optimise2_field (cboxes & bs,
+ pvect const dboxes::* fast_field_recv,
+ pvect const dboxes::* fast_field_send,
+ srpvect dboxes::* fast_field_sendrecv);
+
public: // should be readonly
// Fields
diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc
index f698835ec..70fe3884b 100644
--- a/Carpet/CarpetLib/src/ggf.cc
+++ b/Carpet/CarpetLib/src/ggf.cc
@@ -167,6 +167,7 @@ void ggf::recompose_fill (comm_state & state, int const rl,
}
}
+#if 0
for (int c = 0; c < h.components (rl); ++c) {
// Initialise from the same level of the old hierarchy, where
@@ -200,6 +201,36 @@ void ggf::recompose_fill (comm_state & state, int const rl,
} // if do_prolongate
} // for c
+#endif
+
+ // Initialise from the same level of the old hierarchy, where
+ // possible
+ if (rl < (int)oldstorage.AT(ml).size()) {
+ for (int tl = 0; tl < timelevels (ml, rl); ++tl) {
+ transfer_from_all (state,
+ tl, rl, ml,
+ & dh::dboxes::fast_old2new_sync_sendrecv,
+ tl, rl, ml,
+ & oldstorage);
+ } // for tl
+ } // if rl
+
+ if (do_prolongate) {
+ // Initialise from a coarser level of the new hierarchy, where
+ // possible
+ if (rl > 0) {
+ if (transport_operator != op_none and transport_operator != op_sync) {
+ for (int tl = 0; tl < timelevels (ml, rl); ++tl) {
+ transfer_from_all (state,
+ tl, rl, ml,
+ & dh::dboxes::fast_old2new_ref_prol_sendrecv,
+ tls, rl - 1, ml,
+ t.time (tl, rl, ml));
+ } // for tl
+ } // if transport_operator
+ } // if rl
+ } // if do_prolongate
+
} // for ml
timer.stop (0);
@@ -274,13 +305,13 @@ void ggf::fill (int rl, int c, int ml) {
-// Synchronise the boundaries a component
+// Synchronise the boundaries of a component
void
ggf::
sync (comm_state & state,
int const tl, int const rl, int const c, int const ml)
{
- if(transport_operator == op_none) return;
+ if (transport_operator == op_none) return;
// Copy
static Timer timer ("sync");
timer.start ();
@@ -292,6 +323,23 @@ sync (comm_state & state,
timer.stop (0);
}
+// Synchronise the boundaries of all components
+void
+ggf::
+sync_all (comm_state & state,
+ int const tl, int const rl, int const ml)
+{
+ if (transport_operator == op_none) return;
+ // Copy
+ static Timer timer ("sync_all");
+ timer.start ();
+ transfer_from_all (state,
+ tl,rl,ml,
+ & dh::dboxes::fast_sync_sendrecv,
+ tl,rl,ml);
+ timer.stop (0);
+}
+
// Prolongate the boundaries of a component
@@ -334,6 +382,45 @@ ref_bnd_prolongate (comm_state & state,
timer.stop (0);
}
+// Prolongate the boundaries of all components
+void
+ggf::
+ref_bnd_prolongate_all (comm_state & state,
+ int const tl, int const rl, int const ml,
+ CCTK_REAL const time)
+{
+ // Interpolate
+ assert (rl>=1);
+ if (transport_operator == op_none or transport_operator == op_sync) return;
+ vector<int> tl2s;
+ static Timer timer ("ref_bnd_prolongate_all");
+ timer.start ();
+ if (transport_operator != op_copy) {
+ // Interpolation in time
+ if (not (timelevels(ml,rl) >= prolongation_order_time+1)) {
+ char * const fullname = CCTK_FullName (varindex);
+ CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "The variable \"%s\" has only %d active time levels, which is not enough for boundary prolongation of order %d",
+ fullname ? fullname : "<unknown variable>",
+ timelevels(ml,rl), prolongation_order_time);
+ free (fullname);
+ }
+ assert (timelevels(ml,rl) >= prolongation_order_time+1);
+ tl2s.resize(prolongation_order_time+1);
+ for (int i=0; i<=prolongation_order_time; ++i) tl2s.AT(i) = i;
+ } else {
+ assert (timelevels(ml,rl) >= 1);
+ tl2s.resize(1);
+ tl2s.AT(0) = 0;
+ }
+ transfer_from_all (state,
+ tl ,rl ,ml,
+ & dh::dboxes::fast_ref_bnd_prol_sendrecv,
+ tl2s,rl-1,ml,
+ time);
+ timer.stop (0);
+}
+
// Restrict a multigrid level
@@ -359,6 +446,28 @@ mg_restrict (comm_state & state,
timer.stop (0);
}
+// Restrict a multigrid level
+void
+ggf::
+mg_restrict_all (comm_state & state,
+ int const tl, int const rl, int const ml,
+ CCTK_REAL const time)
+{
+ static Timer timer ("mg_restrict_all");
+ timer.start ();
+ // Require same times
+ static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature");
+ assert (abs(t.get_time(rl,ml) - t.get_time(rl,ml-1))
+ <= 1.0e-8 * abs(t.get_time(rl,ml)));
+ vector<int> const tl2s(1,tl);
+ transfer_from_all (state,
+ tl ,rl,ml,
+ & dh::dboxes::fast_mg_rest_sendrecv,
+ tl2s,rl,ml-1,
+ time);
+ timer.stop (0);
+}
+
// Prolongate a multigrid level
@@ -384,6 +493,28 @@ mg_prolongate (comm_state & state,
timer.stop (0);
}
+// Prolongate a multigrid level
+void
+ggf::
+mg_prolongate_all (comm_state & state,
+ int const tl, int const rl, int const ml,
+ CCTK_REAL const time)
+{
+ static Timer timer ("mg_prolongate_all");
+ timer.start ();
+ // Require same times
+ static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature");
+ assert (abs(t.get_time(rl,ml) - t.get_time(rl,ml+1))
+ <= 1.0e-8 * abs(t.get_time(rl,ml)));
+ vector<int> const tl2s(1,tl);
+ transfer_from_all (state,
+ tl ,rl,ml,
+ & dh::dboxes::fast_mg_prol_sendrecv,
+ tl2s,rl,ml+1,
+ time);
+ timer.stop (0);
+}
+
// Restrict a refinement level
@@ -410,6 +541,29 @@ ref_restrict (comm_state & state,
timer.stop (0);
}
+// Restrict a refinement level
+void
+ggf::
+ref_restrict_all (comm_state & state,
+ int const tl, int const rl, int const ml,
+ CCTK_REAL const time)
+{
+ // Require same times
+ static_assert (abs(0.1) > 0, "Function CarpetLib::abs has wrong signature");
+ assert (abs(t.get_time(rl,ml) - t.get_time(rl+1,ml))
+ <= 1.0e-8 * abs(t.get_time(rl,ml)));
+ if (transport_operator == op_none or transport_operator == op_sync) return;
+ static Timer timer ("ref_restrict_all");
+ timer.start ();
+ vector<int> const tl2s(1,tl);
+ transfer_from_all (state,
+ tl ,rl ,ml,
+ & dh::dboxes::fast_ref_rest_sendrecv,
+ tl2s,rl+1,ml,
+ time);
+ timer.stop (0);
+}
+
// Prolongate a refinement level
@@ -437,6 +591,30 @@ ref_prolongate (comm_state & state,
timer.stop (0);
}
+// Prolongate a refinement level
+void
+ggf::
+ref_prolongate_all (comm_state & state,
+ int const tl, int const rl, int const ml,
+ CCTK_REAL const time)
+{
+ assert (rl>=1);
+ if (transport_operator == op_none or transport_operator == op_sync) return;
+ static Timer timer ("ref_prolongate_all");
+ timer.start ();
+ vector<int> tl2s;
+ // Interpolation in time
+ assert (timelevels(ml,rl) >= prolongation_order_time+1);
+ tl2s.resize(prolongation_order_time+1);
+ for (int i=0; i<=prolongation_order_time; ++i) tl2s.AT(i) = i;
+ transfer_from_all (state,
+ tl ,rl ,ml,
+ & dh::dboxes::fast_ref_prol_sendrecv,
+ tl2s,rl-1,ml,
+ time);
+ timer.stop (0);
+}
+
// Transfer regions
@@ -459,6 +637,9 @@ transfer_from (comm_state & state,
pvect const & psends = d.boxes.AT(ml1).AT(rl1).AT(c1).*sends;
assert (precvs.size() == psends.size());
+ // Return early if this communication does not concern us
+ if (precvs.empty()) return;
+
mdata & srcstorage = srcstorage_ ? * srcstorage_ : storage;
if (not precvs.empty()) {
@@ -475,6 +656,7 @@ transfer_from (comm_state & state,
total.start ();
// Interpolation orders
+ assert (transport_operator != op_none);
int const pos =
transport_operator == op_copy ? 0 : d.prolongation_order_space;
int const pot =
@@ -513,3 +695,77 @@ transfer_from (comm_state & state,
total.stop (0);
}
+
+// Transfer regions of all components
+void
+ggf::
+transfer_from_all (comm_state & state,
+ int const tl1, int const rl1, int const ml1,
+ srpvect const dh::dboxes::* sendrecvs,
+ vector<int> const & tl2s, int const rl2, int const ml2,
+ CCTK_REAL const & time,
+ mdata * const srcstorage_)
+{
+ assert (rl1>=0 and rl1<h.reflevels());
+ assert (ml1>=0 and ml1<h.mglevels());
+ assert (tl1>=0 and tl1<timelevels(ml1,rl1));
+
+ int const p = dist::rank();
+ srpvect const & psendrecvs = d.boxes.AT(ml1).AT(rl1).AT(p).*sendrecvs;
+
+ // Return early if this communication does not concern us
+ if (psendrecvs.empty()) return;
+
+ static Timer total ("transfer_from_all");
+ total.start ();
+
+ mdata & srcstorage = srcstorage_ ? * srcstorage_ : storage;
+
+ assert ( ml2<(int)srcstorage.size());
+ assert (rl2>=0 and rl2<(int)srcstorage.AT(ml2).size());
+ for (size_t i = 0; i < tl2s.size(); ++ i) {
+ int const tl2 = tl2s.AT(i);
+ assert (tl2>=0 and tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(0).size());
+ }
+
+ // Set up source times
+ vector<CCTK_REAL> times(tl2s.size());
+ for (size_t i=0; i<tl2s.size(); ++i) {
+ assert (tl2s.AT(i)>=0 and tl2s.AT(i)<timelevels(ml2,rl2));
+ for (size_t j=0; j<i; ++j) {
+ assert (tl2s.AT(i) != tl2s.AT(j));
+ }
+ times.AT(i) = t.time(tl2s.AT(i),rl2,ml2);
+ }
+
+ // Interpolation orders
+ assert (transport_operator != op_none);
+ int const pos =
+ transport_operator == op_copy ? 0 : d.prolongation_order_space;
+ int const pot =
+ transport_operator == op_copy ? 0 : prolongation_order_time;
+
+ vector<const gdata*> gsrcs(tl2s.size());
+
+ // Walk all regions
+ for (srpvect::const_iterator ipsendrecv = psendrecvs.begin();
+ ipsendrecv!=psendrecvs.end(); ++ ipsendrecv)
+ {
+ pseudoregion const & psend = (* ipsendrecv).send;
+ pseudoregion const & precv = (* ipsendrecv).recv;
+ ibbox const & send = psend.extent;
+ ibbox const & recv = precv.extent;
+ int const c2 = psend.processor;
+ int const c1 = precv.processor;
+
+ // Source and destination data
+ gdata * const dst = storage.AT(ml1).AT(rl1).AT(c1).AT(tl1);
+ cdata const & srcs = srcstorage.AT(ml2).AT(rl2);
+ for (int i=0; i<(int)gsrcs.size(); ++i) {
+ gsrcs.AT(i) = srcs.AT(c2).AT(tl2s.AT(i));
+ }
+ dst->transfer_from (state, gsrcs, times, recv, send, time, pos, pot);
+ }
+
+ total.stop (0);
+}
diff --git a/Carpet/CarpetLib/src/ggf.hh b/Carpet/CarpetLib/src/ggf.hh
index 99eae2167..199d3b5a1 100644
--- a/Carpet/CarpetLib/src/ggf.hh
+++ b/Carpet/CarpetLib/src/ggf.hh
@@ -34,6 +34,7 @@ class ggf {
typedef vector<iblist> iblistvect;
typedef vector <pseudoregion> pvect;
+ typedef vector <sendrecv_pseudoregion> srpvect;
typedef gdata* tdata; // data ...
typedef vector<tdata> fdata; // ... for each time level
@@ -114,26 +115,37 @@ public:
// Synchronise the boundaries of a component
void sync (comm_state& state, int tl, int rl, int c, int ml);
+ void sync_all (comm_state& state, int tl, int rl, int ml);
// Prolongate the boundaries of a component
void ref_bnd_prolongate (comm_state& state,
int tl, int rl, int c, int ml, CCTK_REAL time);
+ void ref_bnd_prolongate_all (comm_state& state,
+ int tl, int rl, int ml, CCTK_REAL time);
// Restrict a multigrid level
void mg_restrict (comm_state& state,
int tl, int rl, int c, int ml, CCTK_REAL time);
+ void mg_restrict_all (comm_state& state,
+ int tl, int rl, int ml, CCTK_REAL time);
// Prolongate a multigrid level
void mg_prolongate (comm_state& state,
int tl, int rl, int c, int ml, CCTK_REAL time);
+ void mg_prolongate_all (comm_state& state,
+ int tl, int rl, int ml, CCTK_REAL time);
// Restrict a refinement level
void ref_restrict (comm_state& state,
int tl, int rl, int c, int ml, CCTK_REAL time);
+ void ref_restrict_all (comm_state& state,
+ int tl, int rl, int ml, CCTK_REAL time);
// Prolongate a refinement level
void ref_prolongate (comm_state& state,
int tl, int rl, int c, int ml, CCTK_REAL time);
+ void ref_prolongate_all (comm_state& state,
+ int tl, int rl, int ml, CCTK_REAL time);
@@ -156,6 +168,13 @@ protected:
vector<int> const & tl2s, int rl2, int ml2,
CCTK_REAL const & time,
mdata * srcstorage = 0);
+ void
+ transfer_from_all (comm_state & state,
+ int tl1, int rl1, int ml1,
+ srpvect const dh::dboxes::* sendrecvs,
+ vector<int> const & tl2s, int rl2, int ml2,
+ CCTK_REAL const & time,
+ mdata * srcstorage = 0);
void
transfer_from (comm_state & state,
@@ -175,6 +194,23 @@ protected:
time,
srcstorage);
}
+ void
+ transfer_from_all (comm_state & state,
+ int tl1, int rl1, int ml1,
+ srpvect const dh::dboxes::* sendrecvs,
+ int tl2, int rl2, int ml2,
+ mdata * srcstorage = 0)
+ {
+ vector <int> tl2s(1);
+ tl2s.AT(0) = tl2;
+ CCTK_REAL const time = t.time (tl2,rl2,ml2);
+ transfer_from_all (state,
+ tl1, rl1, ml1,
+ sendrecvs,
+ tl2s, rl2, ml2,
+ time,
+ srcstorage);
+ }