From a9bd55ce62a101ce9c2c8c40b9fda40ff894a250 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 3 Sep 2009 16:19:15 -0500 Subject: Import Carpet Ignore-this: 309b4dd613f4af2b84aa5d6743fdb6b3 --- Carpet/CarpetLib/README | 6 +- Carpet/CarpetLib/interface.ccl | 3 + Carpet/CarpetLib/param.ccl | 61 +- Carpet/CarpetLib/schedule.ccl | 4 +- Carpet/CarpetLib/src/bbox.cc | 58 +- Carpet/CarpetLib/src/bbox.hh | 19 +- Carpet/CarpetLib/src/bboxset.cc | 51 +- Carpet/CarpetLib/src/bboxset.hh | 102 +- Carpet/CarpetLib/src/commstate.cc | 710 ++++++----- Carpet/CarpetLib/src/commstate.hh | 135 ++- Carpet/CarpetLib/src/copy_3d.cc | 2 +- Carpet/CarpetLib/src/copy_4d.cc | 142 +++ Carpet/CarpetLib/src/data.cc | 418 +++++-- Carpet/CarpetLib/src/data.hh | 30 +- Carpet/CarpetLib/src/defs.cc | 163 ++- Carpet/CarpetLib/src/defs.hh | 302 +++-- Carpet/CarpetLib/src/dh.cc | 1232 +++++++++++++++----- Carpet/CarpetLib/src/dh.hh | 123 +- Carpet/CarpetLib/src/dist.cc | 212 +++- Carpet/CarpetLib/src/dist.hh | 295 +++-- Carpet/CarpetLib/src/fulltree.cc | 46 +- Carpet/CarpetLib/src/fulltree.hh | 4 +- Carpet/CarpetLib/src/gdata.cc | 198 ++-- Carpet/CarpetLib/src/gdata.hh | 43 +- Carpet/CarpetLib/src/gf.cc | 12 +- Carpet/CarpetLib/src/gf.hh | 12 +- Carpet/CarpetLib/src/ggf.cc | 174 ++- Carpet/CarpetLib/src/ggf.hh | 22 +- Carpet/CarpetLib/src/gh.cc | 136 ++- Carpet/CarpetLib/src/gh.hh | 51 +- Carpet/CarpetLib/src/interpolate_3d_2tl.cc | 2 +- Carpet/CarpetLib/src/interpolate_3d_3tl.cc | 2 +- Carpet/CarpetLib/src/interpolate_3d_4tl.cc | 2 +- Carpet/CarpetLib/src/interpolate_3d_5tl.cc | 2 +- Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc | 2 +- Carpet/CarpetLib/src/limits.cc | 95 ++ Carpet/CarpetLib/src/limits.hh | 8 + Carpet/CarpetLib/src/make.code.defn | 12 +- Carpet/CarpetLib/src/mem.cc | 101 +- Carpet/CarpetLib/src/mem.hh | 11 +- Carpet/CarpetLib/src/mpi_string.cc | 480 ++++++++ Carpet/CarpetLib/src/mpi_string.hh | 55 + Carpet/CarpetLib/src/operator_prototypes.hh | 241 ---- Carpet/CarpetLib/src/operator_prototypes_3d.hh | 305 +++++ Carpet/CarpetLib/src/operator_prototypes_4d.hh | 92 ++ Carpet/CarpetLib/src/operators.hh | 4 +- Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc | 320 +++++ Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc | 390 +++++++ Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc | 545 +++++++++ Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc | 45 +- Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc | 44 +- Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc | 44 +- Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc | 46 +- .../CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc | 851 ++++++++++++++ Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc | 44 +- Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc | 44 +- Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc | 44 +- Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc | 602 ++++++++++ Carpet/CarpetLib/src/region.cc | 99 +- Carpet/CarpetLib/src/region.hh | 50 +- Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc | 2 +- Carpet/CarpetLib/src/restrict_3d_rf2.cc | 2 +- Carpet/CarpetLib/src/restrict_4d_rf2.cc | 141 +++ Carpet/CarpetLib/src/startup_time.cc | 82 ++ Carpet/CarpetLib/src/startup_time.hh | 6 + Carpet/CarpetLib/src/th.cc | 27 +- Carpet/CarpetLib/src/th.hh | 15 +- Carpet/CarpetLib/src/timestat.cc | 175 ++- Carpet/CarpetLib/src/vect.cc | 39 +- Carpet/CarpetLib/src/vect.hh | 286 +++-- Carpet/CarpetLib/src/vect_helpers.hh | 184 +-- 71 files changed, 8382 insertions(+), 1925 deletions(-) create mode 100644 Carpet/CarpetLib/src/copy_4d.cc create mode 100644 Carpet/CarpetLib/src/limits.cc create mode 100644 Carpet/CarpetLib/src/limits.hh create mode 100644 Carpet/CarpetLib/src/mpi_string.cc create mode 100644 Carpet/CarpetLib/src/mpi_string.hh create mode 100644 Carpet/CarpetLib/src/operator_prototypes_3d.hh create mode 100644 Carpet/CarpetLib/src/operator_prototypes_4d.hh create mode 100644 Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc create mode 100644 Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc create mode 100644 Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc create mode 100644 Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc create mode 100644 Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc create mode 100644 Carpet/CarpetLib/src/restrict_4d_rf2.cc create mode 100644 Carpet/CarpetLib/src/startup_time.cc create mode 100644 Carpet/CarpetLib/src/startup_time.hh (limited to 'Carpet/CarpetLib') diff --git a/Carpet/CarpetLib/README b/Carpet/CarpetLib/README index 82e529534..6e2e17abc 100644 --- a/Carpet/CarpetLib/README +++ b/Carpet/CarpetLib/README @@ -1,7 +1,9 @@ Cactus Code Thorn CarpetLib -Authors : Erik Schnetter +Author(s) : Erik Schnetter +Maintainer(s): Erik Schnetter +Licence : GPLv2+ -------------------------------------------------------------------------- -Purpose of the thorn: +1. Purpose This thorn contains the backend library that provides mesh refinement. diff --git a/Carpet/CarpetLib/interface.ccl b/Carpet/CarpetLib/interface.ccl index 6e7462678..c88cf032d 100644 --- a/Carpet/CarpetLib/interface.ccl +++ b/Carpet/CarpetLib/interface.ccl @@ -2,6 +2,8 @@ IMPLEMENTS: CarpetLib +includes header: mpi_string.hh in mpi_string.hh + includes header: defs.hh in defs.hh includes header: dist.hh in dist.hh includes header: typeprops.hh in typeprops.hh @@ -28,6 +30,7 @@ includes header: th.hh in th.hh includes header: operators.hh in operators.hh uses include header: carpet_typecase.hh +uses include header: CarpetTimers.hh diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index 45724f25d..31a43a135 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -41,19 +41,48 @@ BOOLEAN poison_new_memory "Try to catch uninitialised data by setting newly allo { } "no" +RESTRICTED: + CCTK_INT poison_value "Integer value (0..255) used to poison new timelevels (with memset)" STEERABLE=always { 0:255 :: "Must fit into a byte. Use 0 for zero, 255 for nan, and e.g. 113 for a large value." } 255 +CCTK_INT deadbeef "A strange integer value that indicates that something has gone wrong; the integer equivalent of a nan" STEERABLE=always +{ + *:* :: "should be large and positive" +} 666 # 7353315 + +PRIVATE: + +# System limits + +INT max_core_size_MB "Maximum size of a core file, set via setrlimit" STEERABLE=recover +{ + -2 :: "unchanged" + -1 :: "unlimited" + 0:* :: "limited" +} -2 + +INT max_memory_size_MB "Maximum amount of memory per MPI process, set via setrlimit" STEERABLE=recover +{ + -2 :: "unchanged" + -1 :: "unlimited" + 0:* :: "limited" +} -2 + + + +# Statistics + INT print_timestats_every "Print timing statistics periodically" STEERABLE=always { -1 :: "don't report" - 0 :: "don't report" + 0 :: "report after initialisation" 1:* :: "report every so many iterations" -} 0 +} -1 STRING timestat_file "File name in which timestat output is collected (because stdout from the root node may not be enough)" STEERABLE=always { @@ -70,7 +99,7 @@ INT print_memstats_every "Report periodically how much memory is used per proces 1:* :: "report every so many iterations" } 0 -INT max_allowed_memory_MB "Maximum allowed amount of memory per process (in Megabytes)" STEERABLE=always +INT max_allowed_memory_MB "Maximum allowed amount of memory per process that can be allocated for grid variables (in Megabytes)" STEERABLE=always { -1 :: "no maximum" 0 :: "no maximum" @@ -85,21 +114,29 @@ STRING memstat_file "File name in which memstat output is collected (because std -# Experimental recomposing parameters - BOOLEAN combine_recompose "Recompose all grid functions of one refinement levels at once" STEERABLE=always { -} "no" +} "yes" -# Experimental communication parameters +# Communication experiment parameters -BOOLEAN interleave_communications "Try to interleave communications with each other; each processor begins to communicate with its 'right neighbour' in rank, instead of with the root processor" STEERABLE=always +INT message_size_multiplier "Enlarge size of transmitted messages by this factor" STEERABLE=always { -} "no" + 1:* :: "" +} 1 + +INT message_count_multiplier "Transmit messages this many times" STEERABLE=always +{ + 1:* :: "" +} 1 -BOOLEAN vary_tags "Use different tags for each communication" STEERABLE=always + + +# Experimental communication parameters + +BOOLEAN interleave_communications "Try to interleave communications with each other; each processor begins to communicate with its 'right neighbour' in rank, instead of with the root processor" STEERABLE=always { } "no" @@ -107,11 +144,11 @@ BOOLEAN barrier_between_stages "Add a barrier between the communication stages ( { } "no" -BOOLEAN combine_sends "Send data together and in order of processor ranks" STEERABLE=always +BOOLEAN check_communication_schedule "Check the communication schedule at run time (expensive)" STEERABLE=always { } "no" -BOOLEAN reduce_mpi_waitall "Call MPI_Waitall only for requests that are not null" STEERABLE=always +BOOLEAN combine_sends "Send data together and in order of processor ranks" STEERABLE=always { } "no" diff --git a/Carpet/CarpetLib/schedule.ccl b/Carpet/CarpetLib/schedule.ccl index d371bc7a5..a208c97ca 100644 --- a/Carpet/CarpetLib/schedule.ccl +++ b/Carpet/CarpetLib/schedule.ccl @@ -1,10 +1,10 @@ # Schedule definitions for thorn CarpetLib -SCHEDULE CarpetLib_setmemlimit AT WRAGH +SCHEDULE CarpetLib_registercycleclock AT startup BEFORE Driver_Startup { LANG: C OPTIONS: global -} "Set operating system memory limit" +} "Register cycle based timer" SCHEDULE CarpetLib_printtimestats AT analysis { diff --git a/Carpet/CarpetLib/src/bbox.cc b/Carpet/CarpetLib/src/bbox.cc index a14e1e9a3..164d63884 100644 --- a/Carpet/CarpetLib/src/bbox.cc +++ b/Carpet/CarpetLib/src/bbox.cc @@ -1,4 +1,4 @@ - +#include #include #include #include @@ -6,7 +6,8 @@ #include #include -#include "cctk.h" +#include +#include #include "defs.hh" #include "vect.hh" @@ -21,8 +22,8 @@ using namespace std; template void bbox::assert_bbox_limits () const { - assert (all(_stride>T(0))); - assert (all((_upper-_lower)%_stride == T(0))); + ASSERT_BBOX (all(_stride>T(0))); + ASSERT_BBOX (all((_upper-_lower)%_stride == T(0))); if (numeric_limits::is_integer) { // prevent accidental wrap-around if (any (_lower >= numeric_limits::max() / 2) or @@ -30,29 +31,34 @@ void bbox::assert_bbox_limits () const any (_upper >= numeric_limits::max() / 2) or any (_upper <= numeric_limits::min() / 2)) { - ostringstream lbuf, ubuf, sbuf; - lbuf << _lower; - ubuf << _upper; - sbuf << _stride; - string const lstr = lbuf.str(); - string const ustr = ubuf.str(); - string const sstr = sbuf.str(); - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "Tried to create a very large bbox [%s,%s,%s] of type %s -- it is likely that this would lead to an integer overflow", - lstr.c_str(), ustr.c_str(), sstr.c_str(), - typeid(*this).name()); + ostringstream buf; + T dummy; + buf << "Tried to create a very large bbox [" << _lower << "," << _upper << "," << _stride << "] for the type " << typeid(dummy).name() << " -- it is likely that this would lead to an integer overflow"; + CCTK_WARN (CCTK_WARN_ABORT, buf.str().c_str()); } } } +// Poison +template +bbox bbox::poison () +{ + DECLARE_CCTK_PARAMETERS; + + vect const v (deadbeef); + return bbox (v, v, v); +} + + + // Accessors template typename bbox::size_type bbox::size () const { if (empty()) return 0; const vect sh(shape()/stride()); -#ifdef NDEBUG +#ifndef CARPET_DEBUG return prod(vect(sh)); #else size_type sz = 1, max = numeric_limits::max(); @@ -103,7 +109,7 @@ bool bbox::is_aligned_with (const bbox& b) const { template bool bbox::operator== (const bbox& b) const { if (empty() and b.empty()) return true; - assert (all(stride()==b.stride())); + ASSERT_BBOX (all(stride()==b.stride())); return all(lower()==b.lower() and upper()==b.upper()); } @@ -160,8 +166,8 @@ bool bbox::operator> (const bbox& b) const { template bbox bbox::expand (const vect& lo, const vect& hi) const { // Allow expansion only into directions where the extent is not negative - // assert (all(lower()<=upper() or (lo==T(0) and hi==T(0)))); - assert (all(shape()>=vect(0) or (lo==T(0) and hi==T(0)))); + // ASSERT_BBOX (all(lower()<=upper() or (lo==T(0) and hi==T(0)))); + ASSERT_BBOX (all(shape()>=vect(0) or (lo==T(0) and hi==T(0)))); const vect str = stride(); const vect lb = lower() - lo * str; const vect ub = upper() + hi * str; @@ -197,7 +203,7 @@ template bbox bbox::expanded_containing (const bbox& b) const { if (empty()) return b; if (b.empty()) return *this; - assert (is_aligned_with(b)); + ASSERT_BBOX (is_aligned_with(b)); const vect lo = min(lower(), b.lower()); const vect up = max(upper(), b.upper()); const vect str = min(stride(), b.stride()); @@ -268,12 +274,13 @@ void bbox::input (istream& is) { consume (is, '/'); size_type size_dummy; is >> size_dummy; - assert (is.good()); + ASSERT_BBOX (is.good()); skipws (is); } consume (is, ')'); } catch (input_error &err) { - cout << "Input error while reading a bbox" << endl; + T Tdummy; + cout << "Input error while reading a bbox<" << typestring(Tdummy) << "," << D << ">" << endl; throw err; } if (any(_stride<=T(0))) { @@ -286,8 +293,8 @@ void bbox::input (istream& is) { << " The stride does not evenly divide the extent." << endl; throw input_error(); } - assert (all(_stride>T(0))); - assert (all((_upper-_lower)%_stride == T(0))); + ASSERT_BBOX (all(_stride>T(0))); + ASSERT_BBOX (all((_upper-_lower)%_stride == T(0))); } @@ -308,4 +315,5 @@ template class bbox; template class bbox; template class bbox; template class bbox; -template class bbox; +template class bbox; +template class bbox; diff --git a/Carpet/CarpetLib/src/bbox.hh b/Carpet/CarpetLib/src/bbox.hh index a79fbe87b..8386f58a2 100644 --- a/Carpet/CarpetLib/src/bbox.hh +++ b/Carpet/CarpetLib/src/bbox.hh @@ -13,6 +13,14 @@ using namespace std; +#ifdef CARPET_DEBUG +# define ASSERT_BBOX(x) assert(x) +#else +# define ASSERT_BBOX(x) +#endif + + + // Forward declaration template class bbox; @@ -69,11 +77,14 @@ public: const vect& stride_) : _lower(lower_), _upper(upper_), _stride(stride_) { -#ifndef NDEBUG +#ifndef CARPET_DEBUG assert_bbox_limits(); #endif } + // Poison + static bbox poison (); + // Accessors // (Don't return references; *this might be a temporary) @@ -131,7 +142,7 @@ public: bbox b. */ bbox operator& (const bbox& b) const { - assert (all(stride()==b.stride())); + ASSERT_BBOX (all(stride()==b.stride())); vect lo = max(lower(),b.lower()); vect up = min(upper(),b.upper()); return bbox(lo,up,stride()); @@ -179,7 +190,7 @@ public: iterator end () const; // Memory usage - size_t memory () const + size_t memory () const CCTK_ATTRIBUTE_CONST { return memoryof (_lower) + memoryof (_upper) + memoryof (_stride); } @@ -193,6 +204,8 @@ public: // Memory usage +template +inline size_t memoryof (bbox const & b) CCTK_ATTRIBUTE_CONST; template inline size_t memoryof (bbox const & b) { return b.memory(); } diff --git a/Carpet/CarpetLib/src/bboxset.cc b/Carpet/CarpetLib/src/bboxset.cc index eceb264d8..a5748a7c6 100644 --- a/Carpet/CarpetLib/src/bboxset.cc +++ b/Carpet/CarpetLib/src/bboxset.cc @@ -51,6 +51,11 @@ bboxset::bboxset (const vector >& vlb) { normalize(); } +template +bboxset bboxset::poison () { + return bboxset (bbox::poison()); +} + // Invariant @@ -455,16 +460,54 @@ bool bboxset::operator!= (const bboxset& s) const { +// Input +template +istream& bboxset::input (istream& is) { + T Tdummy; + try { + skipws (is); + consume (is, "bboxset<"); + consume (is, typestring(Tdummy)); + consume (is, ","); + int D_; + is >> D_; + if (D_ != D) { + cout << "Input error: Wrong bboxset dimension " << D_ << ", expected " << D << endl; + throw input_error(); + } + consume (is, ">:{"); + consume (is, "size="); + size_type size_; + is >> size_; + consume (is, ","); + consume (is, "setsize="); + int setsize_; + is >> setsize_; + consume (is, ","); + consume (is, "set="); + is >> bs; + consume (is, "}"); + } catch (input_error & err) { + cout << "Input error while reading a bboxset<" << typestring(Tdummy) << "," << D << ">" << endl; + throw err; + } + return is; +} + + + // Output template -void bboxset::output (ostream& os) const { +ostream& bboxset::output (ostream& os) const { T Tdummy; - os << "bboxset<" << typestring(Tdummy) << "," << D << ">:" + os << "bboxset<" << typestring(Tdummy) << "," << D << ">:{" << "size=" << size() << "," << "setsize=" << setsize() << "," - << "set=" << bs; + << "set=" << bs + << "}"; + return os; } -template class bboxset; +template class bboxset; diff --git a/Carpet/CarpetLib/src/bboxset.hh b/Carpet/CarpetLib/src/bboxset.hh index 45e0dc0f2..5d206da33 100644 --- a/Carpet/CarpetLib/src/bboxset.hh +++ b/Carpet/CarpetLib/src/bboxset.hh @@ -28,6 +28,10 @@ template class bboxset; // template // bboxset operator- (const bbox& b, const bboxset& s); +// Input +template +istream& operator>> (istream& is, bboxset& s); + // Output template ostream& operator<< (ostream& os, const bboxset& s); @@ -60,6 +64,8 @@ public: bboxset (const list& lb); bboxset (const vector >& vlb); + static bboxset poison (); + // Invariant bool invariant () const; @@ -143,10 +149,13 @@ public: // iterator end () const { return bs.end(); } // Memory usage - size_t memory () const { return memoryof (bs); } + size_t memory () const CCTK_ATTRIBUTE_PURE { return memoryof (bs); } + + // Input + istream& input (istream& is); // Output - void output (ostream& os) const; + ostream& output (ostream& os) const; }; @@ -186,64 +195,112 @@ inline bboxset operator& (const bbox& b, const bboxset& s) { template -inline bool operator== (const bbox& b, const bboxset& s) { +inline bool operator== (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator== (const bbox& b, const bboxset& s) +{ return bboxset(b) == s; } template -inline bool operator!= (const bbox& b, const bboxset& s) { +inline bool operator!= (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator!= (const bbox& b, const bboxset& s) +{ return bboxset(b) != s; } template -inline bool operator< (const bbox& b, const bboxset& s) { +inline bool operator< (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator< (const bbox& b, const bboxset& s) +{ return bboxset(b) < s; } template -inline bool operator<= (const bbox& b, const bboxset& s) { +inline bool operator<= (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator<= (const bbox& b, const bboxset& s) +{ return bboxset(b) <= s; } template -inline bool operator> (const bbox& b, const bboxset& s) { +inline bool operator> (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator> (const bbox& b, const bboxset& s) +{ return bboxset(b) > s; } template -inline bool operator>= (const bbox& b, const bboxset& s) { +inline bool operator>= (const bbox& b, const bboxset& s) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator>= (const bbox& b, const bboxset& s) +{ return bboxset(b) >= s; } template -inline bool operator== (const bboxset& s, const bbox& b) { +inline bool operator== (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator== (const bboxset& s, const bbox& b) +{ return s == bboxset(b); } template -inline bool operator!= (const bboxset& s, const bbox& b) { +inline bool operator!= (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator!= (const bboxset& s, const bbox& b) +{ return s != bboxset(b); } template -inline bool operator< (const bboxset& s, const bbox& b) { +inline bool operator< (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator< (const bboxset& s, const bbox& b) +{ return s < bboxset(b); } template -inline bool operator<= (const bboxset& s, const bbox& b) { +inline bool operator<= (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator<= (const bboxset& s, const bbox& b) +{ return s <= bboxset(b); } template -inline bool operator> (const bboxset& s, const bbox& b) { +inline bool operator> (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator> (const bboxset& s, const bbox& b) +{ return s > bboxset(b); } template -inline bool operator>= (const bboxset& s, const bbox& b) { +inline bool operator>= (const bboxset& s, const bbox& b) + CCTK_ATTRIBUTE_PURE; +template +inline bool operator>= (const bboxset& s, const bbox& b) +{ return s >= bboxset(b); } @@ -251,15 +308,26 @@ inline bool operator>= (const bboxset& s, const bbox& b) { // Memory usage template -inline size_t memoryof (bboxset const & s) { return s.memory(); } +inline size_t memoryof (bboxset const & s) + CCTK_ATTRIBUTE_PURE; +template +inline size_t memoryof (bboxset const & s) +{ return s.memory(); } + + + +// Input +template +inline istream& operator>> (istream& is, bboxset& s) { + return s.input(is); +} // Output template inline ostream& operator<< (ostream& os, const bboxset& s) { - s.output(os); - return os; + return s.output(os); } diff --git a/Carpet/CarpetLib/src/commstate.cc b/Carpet/CarpetLib/src/commstate.cc index 7a00157c2..ef1b64cdb 100644 --- a/Carpet/CarpetLib/src/commstate.cc +++ b/Carpet/CarpetLib/src/commstate.cc @@ -21,241 +21,463 @@ using namespace CarpetLib; +char const * tostring (astate const & thestate) +{ + switch (thestate) { + case state_get_buffer_sizes: return "state_get_buffer_sizes"; + case state_fill_send_buffers: return "state_fill_send_buffers"; + case state_do_some_work: return "state_do_some_work"; + case state_empty_recv_buffers: return "state_empty_recv_buffers"; + case state_done: return "state_done"; + default: assert(0); abort(); + } + return NULL; +} + + + // Communication state control comm_state::comm_state () { - // A comm_state object will step through - // state_get_buffer_sizes - // state_fill_send_buffers - // state_empty_recv_buffers - DECLARE_CCTK_PARAMETERS; - + static Timer timer ("commstate::create"); timer.start (); thestate = state_get_buffer_sizes; - + typebufs.resize (dist::c_ndatatypes()); -#define INSTANTIATE(T) \ - { \ - T dummy; \ - int const type = dist::c_datatype (dummy); \ - assert (typebufs.AT(type).datatypesize == 0); \ - typebufs.AT(type).datatypesize = sizeof dummy; \ - typebufs.AT(type).mpi_datatype = dist::datatype (dummy); \ - typebufs.AT(type).procbufs.resize (dist::size()); \ +#define INSTANTIATE(T) \ + { \ + T dummy; \ + unsigned const type = dist::c_datatype (dummy); \ + typebufs.AT(type).mpi_datatype = dist::mpi_datatype (dummy); \ + typebufs.AT(type).datatypesize = sizeof dummy; \ } #include "instantiate" #undef INSTANTIATE - - srequests.resize (dist::c_ndatatypes() * dist::size(), MPI_REQUEST_NULL); - rrequests.resize (dist::c_ndatatypes() * dist::size(), MPI_REQUEST_NULL); + + srequests.reserve (dist::c_ndatatypes() * dist::size()); + rrequests.reserve (dist::c_ndatatypes() * dist::size()); timer.stop (0); } + void comm_state::step () { DECLARE_CCTK_PARAMETERS; static Timer total ("commstate::step"); total.start (); - assert (thestate != state_done); + + if (barrier_between_stages) { + // Add a barrier, ensuring e.g. that all Irecvs are posted before + // the first Isends are made + if (commstate_verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "before MPI_Barrier; state=%s", tostring(thestate)); + } + MPI_Barrier (dist::comm()); + if (commstate_verbose) { + CCTK_INFO ("after MPI_Barrier"); + } + } + switch (thestate) { - case state_get_buffer_sizes: + + + case state_get_buffer_sizes: { + + if (check_communication_schedule) { + vector sendcount(dist::size() * dist::c_ndatatypes()); + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + for (int proc = 0; proc < dist::size(); ++ proc) { + sendcount.AT(proc * dist::c_ndatatypes() + type) = + typebufs.AT(type).in_use ? + typebufs.AT(type).procbufs.AT(proc).sendbufsize : + 0; + } + assert (sendcount.AT(dist::rank() * dist::c_ndatatypes() + type) == 0); + } + vector recvcount(dist::size() * dist::c_ndatatypes()); + if (commstate_verbose) { + CCTK_INFO ("before MPI_Alltoall"); + } + MPI_Alltoall (&sendcount.front(), dist::c_ndatatypes(), MPI_INT, + &recvcount.front(), dist::c_ndatatypes(), MPI_INT, + dist::comm()); + if (commstate_verbose) { + CCTK_INFO ("after MPI_Alltoall"); + } + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + for (int proc = 0; proc < dist::size(); ++ proc) { + assert (recvcount.AT(proc * dist::c_ndatatypes() + type) == + (typebufs.AT(type).in_use ? + int (typebufs.AT(type).procbufs.AT(proc).recvbufsize) : + 0)); + } + assert (recvcount.AT(dist::rank() * dist::c_ndatatypes() + type) == 0); + } + } + // The sizes of the collective communication buffers are known so // now allocate them. // The receive operations are also posted here already (a clever // MPI layer may take advantage of such early posting). - num_posted_recvs = num_completed_recvs = 0; - for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { - size_t const proc = - interleave_communications - ? (proc1 + dist::rank()) % dist::size() - : proc1; - - for (size_t type = 0; type < typebufs.size(); type++) { + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { - // skip unused datatype buffers - if (not typebufs.AT(type).in_use) continue; - - int datatypesize = typebufs.AT(type).datatypesize; - procbufdesc& procbuf = typebufs.AT(type).procbufs.AT(proc); - - assert (procbuf.sendbufbase.empty()); - assert (procbuf.recvbufbase.empty()); - procbuf.sendbufbase.resize (procbuf.sendbufsize*datatypesize); - procbuf.recvbufbase.resize (procbuf.recvbufsize*datatypesize); - // TODO: this may be a bit extreme, and it is only for - // internal consistency checking - if (poison_new_memory) { - memset (&procbuf.sendbufbase.front(), poison_value, - procbuf.sendbufsize*datatypesize); - memset (&procbuf.recvbufbase.front(), poison_value, - procbuf.recvbufsize*datatypesize); - } - procbuf.sendbuf = &procbuf.sendbufbase.front(); - procbuf.recvbuf = &procbuf.recvbufbase.front(); - - if (procbuf.recvbufsize > 0) { - static Timer timer ("commstate_sizes_irecv"); - timer.start (); - int const tag = - vary_tags - ? (dist::rank() + dist::size() * (proc + dist::size() * type)) % 32768 - : type; - if (commstate_verbose) { - CCTK_VInfo (CCTK_THORNSTRING, - "About to MPI_Irecv from %d", (int)proc); + for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { + int const proc = + interleave_communications ? + (proc1 + dist::rank()) % dist::size() : + proc1; + + int const datatypesize = typebufs.AT(type).datatypesize; + procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc); + + assert (procbuf.sendbufbase.empty()); + assert (procbuf.recvbufbase.empty()); + procbuf.sendbufbase.resize + (procbuf.sendbufsize * datatypesize * message_size_multiplier); + procbuf.recvbufbase.resize + (procbuf.recvbufsize * datatypesize * message_size_multiplier); + // TODO: this may be a bit extreme, and it is only for + // internal consistency checking + if (poison_new_memory) { + memset (&procbuf.sendbufbase.front(), poison_value, + procbuf.sendbufsize * datatypesize * message_size_multiplier); + memset (&procbuf.recvbufbase.front(), poison_value, + procbuf.recvbufsize * datatypesize * message_size_multiplier); } - MPI_Irecv (&procbuf.recvbufbase.front(), procbuf.recvbufsize, - typebufs.AT(type).mpi_datatype, proc, tag, - dist::comm(), &rrequests.AT(dist::size()*type + proc)); - if (commstate_verbose) { - CCTK_INFO ("Finished MPI_Irecv"); + procbuf.sendbuf = &procbuf.sendbufbase.front(); + procbuf.recvbuf = &procbuf.recvbufbase.front(); + + if (procbuf.recvbufsize > 0) { + static Timer timer ("commstate::sizes_irecv"); + timer.start (); + int const tag = type; + if (commstate_verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "About to MPI_Irecv from processor %d for type %s", + proc, dist::c_datatype_name(type)); + } + MPI_Irecv (&procbuf.recvbufbase.front(), + procbuf.recvbufsize * message_size_multiplier, + typebufs.AT(type).mpi_datatype, proc, tag, + dist::comm(), &push_back(rrequests)); + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Irecv"); + } + assert (not procbuf.did_post_recv); + procbuf.did_post_recv = true; + timer.stop (procbuf.recvbufsize * datatypesize); } - timer.stop (procbuf.recvbufsize * datatypesize); - num_posted_recvs++; - } + + } // for proc + } - } + } // for type - if (barrier_between_stages) { - // Add a barrier, to try to ensure that all Irecvs are posted - // before the first Isends are made - // (Alternative: Use MPI_Alltoallv instead) - MPI_Barrier (dist::comm()); + if (check_communication_schedule) { + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { + for (int proc = 0; proc < dist::size(); ++ proc) { + procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc); + assert (procbuf.did_post_recv == (procbuf.recvbufsize > 0)); + } + } + } } - // Now go and get the send buffers filled with data. - // Once a buffer is full it will be posted right away - // (see gdata::copy_into_sendbuffer() and - // gdata::interpolate_into_sendbuffer()). thestate = state_fill_send_buffers; break; + } + - case state_fill_send_buffers: + + case state_fill_send_buffers: { if (combine_sends) { - // Send the data. Do not send them sequentially, but try to - // intersperse the communications - for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { - int const proc = - interleave_communications - ? (proc1 + dist::size() - dist::rank()) % dist::size() - : proc1; - - for (size_t type = 0; type < typebufs.size(); type++) { - // skip unused datatype buffers - if (not typebufs.AT(type).in_use) continue; + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { - int const datatypesize = typebufs.AT(type).datatypesize; - procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc); - - size_t const fillstate = - procbuf.sendbuf - &procbuf.sendbufbase.front(); - assert (fillstate == procbuf.sendbufsize * datatypesize); - - if (procbuf.sendbufsize > 0) { - int const tag = - vary_tags - ? (proc + dist::size() * (dist::rank() + dist::size() * type)) % 32768 - : type; - if (use_mpi_send) { - // use MPI_Send - static Timer timer ("commstate_send"); - timer.start (); - if (commstate_verbose) { - CCTK_VInfo (CCTK_THORNSTRING, - "About to MPI_Send to %d", (int)proc); + for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { + int const proc = + interleave_communications + ? (proc1 + dist::size() - dist::rank()) % dist::size() + : proc1; + + procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc); + if (procbuf.sendbufsize > 0) { + + int const datatypesize = typebufs.AT(type).datatypesize; + + size_t const fillstate = + procbuf.sendbuf - &procbuf.sendbufbase.front(); + assert (fillstate == procbuf.sendbufsize * datatypesize); + + // Enlarge messages for performance testing + if (message_size_multiplier > 1) { + size_t const nbytes = + procbuf.sendbufsize * datatypesize * + (message_size_multiplier - 1); +#warning "TODO" + // memset (procbuf.sendbuf, poison_value, nbytes); + memset (procbuf.sendbuf, 0, nbytes); } - MPI_Send (const_cast(&procbuf.sendbufbase.front()), - procbuf.sendbufsize, - typebufs.AT(type).mpi_datatype, proc, tag, - dist::comm()); - if (commstate_verbose) { - CCTK_INFO ("Finished MPI_Send"); + + int const tag = type; + if (use_mpi_send) { + // use MPI_Send + static Timer timer ("commstate::send"); + timer.start (); + if (commstate_verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "About to MPI_Send to processor %d for type %s", + proc, dist::c_datatype_name(type)); + } + MPI_Send (const_cast(&procbuf.sendbufbase.front()), + procbuf.sendbufsize * message_size_multiplier, + typebufs.AT(type).mpi_datatype, proc, tag, + dist::comm()); + assert (not procbuf.did_post_send); + procbuf.did_post_send = true; + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Send"); + } + timer.stop (procbuf.sendbufsize * datatypesize); + } else if (use_mpi_ssend) { + // use MPI_Ssend + static Timer timer ("commstate::ssend"); + timer.start (); + if (commstate_verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "About to MPI_Ssend to processor %d for type %s", + proc, dist::c_datatype_name(type)); + } + MPI_Ssend (const_cast(&procbuf.sendbufbase.front()), + procbuf.sendbufsize * message_size_multiplier, + typebufs.AT(type).mpi_datatype, proc, tag, + dist::comm()); + assert (not procbuf.did_post_send); + procbuf.did_post_send = true; + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Ssend"); + } + timer.stop (procbuf.sendbufsize * datatypesize); + } else { + // use MPI_Isend + static Timer timer ("commstate::isend"); + timer.start (); + if (commstate_verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "About to MPI_Isend to processor %d for type %s", + proc, dist::c_datatype_name(type)); + } + MPI_Isend (const_cast(&procbuf.sendbufbase.front()), + procbuf.sendbufsize * message_size_multiplier, + typebufs.AT(type).mpi_datatype, proc, tag, + dist::comm(), &push_back(srequests)); + assert (not procbuf.did_post_send); + procbuf.did_post_send = true; + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Isend"); + } + timer.stop (procbuf.sendbufsize * datatypesize); } - srequests.AT(dist::size()*type + proc) = MPI_REQUEST_NULL; - timer.stop (procbuf.sendbufsize * datatypesize); - } else if (use_mpi_ssend) { - // use MPI_Ssend - static Timer timer ("commstate_ssend"); + + } + } // for proc + + } + } // for type + } // if combine_sends + + if (check_communication_schedule) { + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { + for (int proc = 0; proc < dist::size(); ++ proc) { + procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc); + assert (procbuf.did_post_send == (procbuf.sendbufsize > 0)); + } + } + } + } + + thestate = state_do_some_work; + break; + } + + + + case state_do_some_work: { + static Timer timer ("commstate::do_some_work::waitall"); + timer.start (); + if (commstate_verbose) { + CCTK_INFO ("About to MPI_Waitall"); + } + MPI_Waitall (rrequests.size(), &rrequests.front(), MPI_STATUSES_IGNORE); + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Waitall"); + } + timer.stop (0); + + thestate = state_empty_recv_buffers; + break; + } + + + + case state_empty_recv_buffers: { + static Timer timer ("commstate::empty_recv_buffers::waitall"); + timer.start (); + if (commstate_verbose) { + CCTK_INFO ("About to MPI_Waitall"); + } + MPI_Waitall (srequests.size(), &srequests.front(), MPI_STATUSES_IGNORE); + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Waitall"); + } + timer.stop (0); + + // Transfer messages again for performance testing + for (int n = 1; n < message_count_multiplier; ++ n) { + + srequests.clear(); + srequests.reserve (dist::c_ndatatypes() * dist::size()); + rrequests.clear(); + rrequests.reserve (dist::c_ndatatypes() * dist::size()); + + // Irecv + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { + + for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { + int const proc = + interleave_communications ? + (proc1 + dist::rank()) % dist::size() : + proc1; + + procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc); + + if (procbuf.recvbufsize > 0) { + static Timer timer ("commstate::message_count_multiplier::irecv"); timer.start (); + int const tag = type; if (commstate_verbose) { CCTK_VInfo (CCTK_THORNSTRING, - "About to MPI_Ssend to %d", (int)proc); + "About to MPI_Irecv from processor %d for type %s", + proc, dist::c_datatype_name(type)); } - MPI_Ssend (const_cast(&procbuf.sendbufbase.front()), - procbuf.sendbufsize, + MPI_Irecv (&procbuf.recvbufbase.front(), + procbuf.recvbufsize * message_size_multiplier, typebufs.AT(type).mpi_datatype, proc, tag, - dist::comm()); + dist::comm(), &push_back(rrequests)); if (commstate_verbose) { - CCTK_INFO ("Finished MPI_Ssend"); + CCTK_INFO ("Finished MPI_Irecv"); } - srequests.AT(dist::size()*type + proc) = MPI_REQUEST_NULL; - timer.stop (procbuf.sendbufsize * datatypesize); - } else { - // use MPI_Isend - static Timer timer ("commstate_isend"); + timer.stop (procbuf.recvbufsize * typebufs.AT(type).datatypesize); + } + + } // for proc + + } + } // for type + + // Isend + for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) { + if (typebufs.AT(type).in_use) { + + for (int proc1 = 0; proc1 < dist::size(); ++ proc1) { + int const proc = + interleave_communications + ? (proc1 + dist::size() - dist::rank()) % dist::size() + : proc1; + + procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc); + + if (procbuf.sendbufsize > 0) { + int const tag = type; + assert (not use_mpi_send); + assert (not use_mpi_ssend); + static Timer timer ("commstate::message_count_multiplier::isend"); timer.start (); if (commstate_verbose) { - CCTK_VWarn (3, __LINE__, __FILE__, CCTK_THORNSTRING, - "About to MPI_Isend to %d", (int)proc); + CCTK_VInfo (CCTK_THORNSTRING, + "About to MPI_Isend to processor %d for type %s", + proc, dist::c_datatype_name(type)); } MPI_Isend (const_cast(&procbuf.sendbufbase.front()), - procbuf.sendbufsize, + procbuf.sendbufsize * message_size_multiplier, typebufs.AT(type).mpi_datatype, proc, tag, - dist::comm(), &srequests.AT(dist::size()*type + proc)); + dist::comm(), &push_back(srequests)); if (commstate_verbose) { CCTK_INFO ("Finished MPI_Isend"); } - timer.stop (procbuf.sendbufsize * datatypesize); + timer.stop (procbuf.sendbufsize * typebufs.AT(type).datatypesize); } - } + + } // for proc - } // for type - - } // for proc - } + } + } // for type + + // Waitall + { + static Timer timer ("commstate::message_count_multiplier::waitall(irecv)"); + timer.start (); + if (commstate_verbose) { + CCTK_INFO ("About to MPI_Waitall"); + } + MPI_Waitall (rrequests.size(), &rrequests.front(), MPI_STATUSES_IGNORE); + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Waitall"); + } + timer.stop (0); + } + + // Waitall + { + static Timer timer ("commstate::message_count_multiplier::waitall(isend)"); + timer.start (); + if (commstate_verbose) { + CCTK_INFO ("About to MPI_Waitall"); + } + MPI_Waitall (srequests.size(), &srequests.front(), MPI_STATUSES_IGNORE); + if (commstate_verbose) { + CCTK_INFO ("Finished MPI_Waitall"); + } + timer.stop (0); + } + + } // for n - // Now fall through to the next state in which the recv buffers - // are emptied as soon as data has arrived. - thestate = state_do_some_work; + thestate = state_done; break; + } + + + + case state_done: { + assert (0); abort(); + } - case state_do_some_work: - // Now fall through to the next state in which the recv buffers - // are emptied as soon as data has arrived. - thestate = state_empty_recv_buffers; - case state_empty_recv_buffers: - // Finish (at least one of) the posted communications - if (not AllPostedCommunicationsFinished ()) { - // No state change if there are still outstanding - // communications; do another comm_state loop iteration. - } else { - // Everything is done so release the collective communication buffers. - for (size_t type = 0; type < typebufs.size(); type++) { - for (size_t proc = 0; proc < typebufs.AT(type).procbufs.size(); proc++) { - typebufs.AT(type).procbufs.AT(proc).sendbufbase.clear(); - typebufs.AT(type).procbufs.AT(proc).recvbufbase.clear(); - } - } - thestate = state_done; - } - break; default: - assert (0 && "invalid state"); + assert (0); abort(); } + + + total.stop (0); } -bool comm_state::done () +bool comm_state::done () const { return thestate == state_done; } @@ -264,116 +486,16 @@ bool comm_state::done () comm_state::~comm_state () { DECLARE_CCTK_PARAMETERS; - + assert (thestate == state_done or thestate == state_get_buffer_sizes); } -// wait for completion of posted collective buffer sends/receives -// -// This function will wait for all of the posted receive operations to -// finish. -// -// It returns true if all posted communications have been completed. -bool comm_state::AllPostedCommunicationsFinished () -{ - DECLARE_CCTK_PARAMETERS; - - // check if all outstanding receives have been completed already - if (num_posted_recvs == num_completed_recvs) { - // finalize the outstanding sends in one go - if (reduce_mpi_waitall) { - size_t nreqs = 0; - for (size_t i=0; i reqs(nreqs); - nreqs = 0; - for (size_t i=0; i reqs(nreqs); - nreqs = 0; - for (size_t i=0; i= 0 and proc < dist::size()); assert (npoints >= 0); typebufdesc & typebuf = typebufs.AT(type); + if (not typebuf.in_use) { + typebuf.procbufs.resize (dist::size()); + typebuf.in_use = true; + } procbufdesc & procbuf = typebuf.procbufs.AT(proc); procbuf.sendbufsize += npoints; - typebuf.in_use = true; } + + void comm_state:: -reserve_recv_space (unsigned int const type, +reserve_recv_space (unsigned const type, int const proc, int const npoints) { @@ -396,19 +523,25 @@ reserve_recv_space (unsigned int const type, assert (proc >= 0 and proc < dist::size()); assert (npoints >= 0); typebufdesc & typebuf = typebufs.AT(type); + if (not typebuf.in_use) { + typebuf.procbufs.resize (dist::size()); + typebuf.in_use = true; + } procbufdesc & procbuf = typebuf.procbufs.AT(proc); procbuf.recvbufsize += npoints; - typebuf.in_use = true; } + + void * comm_state:: -send_buffer (unsigned int const type, +send_buffer (unsigned const type, int const proc, int const npoints) { assert (type < dist::c_ndatatypes()); assert (proc >= 0 and proc < dist::size()); + assert (npoints > 0); typebufdesc const & typebuf = typebufs.AT(type); procbufdesc const & procbuf = typebuf.procbufs.AT(proc); @@ -419,14 +552,17 @@ send_buffer (unsigned int const type, return procbuf.sendbuf; } + + void * comm_state:: -recv_buffer (unsigned int const type, +recv_buffer (unsigned const type, int const proc, int const npoints) { assert (type < dist::c_ndatatypes()); assert (proc >= 0 and proc < dist::size()); + assert (npoints > 0); typebufdesc const & typebuf = typebufs.AT(type); procbufdesc const & procbuf = typebuf.procbufs.AT(proc); @@ -437,9 +573,11 @@ recv_buffer (unsigned int const type, return procbuf.recvbuf; } + + void comm_state:: -commit_send_space (unsigned int const type, +commit_send_space (unsigned const type, int const proc, int const npoints) { @@ -448,6 +586,7 @@ commit_send_space (unsigned int const type, assert (type < dist::c_ndatatypes()); assert (proc >= 0 and proc < dist::size()); assert (npoints >= 0); + assert (npoints > 0); typebufdesc & typebuf = typebufs.AT(type); procbufdesc & procbuf = typebuf.procbufs.AT(proc); procbuf.sendbuf += npoints * typebuf.datatypesize; @@ -461,16 +600,30 @@ commit_send_space (unsigned int const type, &procbuf.sendbufbase.front() + procbuf.sendbufsize * typebuf.datatypesize) { + if (message_size_multiplier > 1) { + size_t const nbytes = + procbuf.sendbufsize * typebuf.datatypesize * + (message_size_multiplier - 1); + memset (procbuf.sendbuf, poison_value, nbytes); + } + static Timer timer ("commit_send_space::isend"); timer.start (); if (commstate_verbose) { CCTK_VInfo (CCTK_THORNSTRING, - "About to MPI_Isend to %d", (int)proc); + "About to MPI_Isend to processor %d for type %s", + proc, dist::c_datatype_name(type)); } + int const tag = type; + assert (procbuf.sendbufsize > 0); + assert (not use_mpi_send); + assert (not use_mpi_ssend); MPI_Isend (&procbuf.sendbufbase.front(), - procbuf.sendbufsize, typebuf.mpi_datatype, - proc, type, dist::comm(), - & srequests.AT(type * dist::size() + proc)); + procbuf.sendbufsize * message_size_multiplier, + typebuf.mpi_datatype, proc, tag, + dist::comm(), &push_back(srequests)); + assert (not procbuf.did_post_send); + procbuf.did_post_send = true; if (commstate_verbose) { CCTK_INFO ("Finished MPI_Isend"); } @@ -479,15 +632,18 @@ commit_send_space (unsigned int const type, } } + + void comm_state:: -commit_recv_space (unsigned int const type, +commit_recv_space (unsigned const type, int const proc, int const npoints) { assert (type < dist::c_ndatatypes()); assert (proc >= 0 and proc < dist::size()); assert (npoints >= 0); + assert (npoints > 0); typebufdesc & typebuf = typebufs.AT(type); procbufdesc & procbuf = typebuf.procbufs.AT(proc); procbuf.recvbuf += npoints * typebuf.datatypesize; diff --git a/Carpet/CarpetLib/src/commstate.hh b/Carpet/CarpetLib/src/commstate.hh index c01f732da..e59ce7cb4 100644 --- a/Carpet/CarpetLib/src/commstate.hh +++ b/Carpet/CarpetLib/src/commstate.hh @@ -2,7 +2,7 @@ #define COMMSTATE_HH #include -#include +#include #include #include @@ -29,115 +29,132 @@ enum astate { state_done }; +char const * tostring (astate const & thestate); + +inline ostream& operator<< (ostream& os, astate const & thestate) +{ + return os << tostring(thestate); +} + + + struct comm_state { astate thestate; - + comm_state (); void step (); - bool done (); + bool done () const; ~comm_state (); - + private: // Forbid copying and passing by value comm_state (comm_state const &); comm_state& operator= (comm_state const &); - -public: - - ////////////////////////////////////////////////////////////////////////// - // the following members are used for collective communications - ////////////////////////////////////////////////////////////////////////// - -public: - // structure describing a per-processor buffer for collective communications + + + + // structure describing a per-processor buffer struct procbufdesc { - // the allocated communication buffers + // allocated communication buffers vector sendbufbase; vector recvbufbase; - - // the sizes of communication buffers (in elements of type ) + + // sizes of the communication buffers (in elements of type ) size_t sendbufsize; size_t recvbufsize; - + // pointers to step through the communication buffers // (these get advanced by the routines which fill/empty the buffers) char* sendbuf; char* recvbuf; - + + bool did_post_send; + bool did_post_recv; + // constructor for an instance of this structure - procbufdesc() : sendbufsize(0), recvbufsize(0), - sendbuf(NULL), recvbuf(NULL) + procbufdesc() : + sendbufsize(0), recvbufsize(0), + sendbuf(NULL), recvbuf(NULL), + did_post_send(false), did_post_recv(false) { } }; - + + + // structure describing a collective communications buffer for a C datatype struct typebufdesc { // flag indicating whether this buffer is in use bool in_use; - + + // the MPI datatype + MPI_Datatype mpi_datatype; + // the size of this datatype (in bytes) int datatypesize; - - // the corresponding MPI datatype - MPI_Datatype mpi_datatype; - + // per-processor buffers - vector procbufs; // [dist::size()] - + vector procbufs; // [dist::size()] + // constructor for an instance of this structure - typebufdesc() : in_use(false), datatypesize(0), - mpi_datatype(MPI_DATATYPE_NULL) + typebufdesc() : + in_use(false), + mpi_datatype(MPI_DATATYPE_NULL), datatypesize(0) { } }; - - // list of datatype buffers - vector typebufs; // [dist::c_ndatatypes()] - + + + + // datatype buffers + vector typebufs; // [type] + + + + // outstanding requests for posted send/recv communications + vector srequests; + vector rrequests; + + static inline + MPI_Request & push_back (vector & reqs) + { + reqs.push_back (MPI_REQUEST_NULL); + return reqs.back(); + } + + + +public: + void - reserve_send_space (unsigned int type, + reserve_send_space (unsigned type, int proc, int npoints); - + void - reserve_recv_space (unsigned int type, + reserve_recv_space (unsigned type, int proc, int npoints); - + void * - send_buffer (unsigned int type, + send_buffer (unsigned type, int proc, int npoints); - + void * - recv_buffer (unsigned int type, + recv_buffer (unsigned type, int proc, int npoints); - + void - commit_send_space (unsigned int type, + commit_send_space (unsigned type, int proc, int npoints); - + void - commit_recv_space (unsigned int type, + commit_recv_space (unsigned type, int proc, int npoints); - -private: - // lists of outstanding requests for posted send/recv communications - vector srequests; // [dist::size() * dist::c_ndatatypes()] - vector rrequests; // [dist::size() * dist::c_ndatatypes()] - - // number of posted and already completed receive communications - int num_posted_recvs; - int num_completed_recvs; - - // wait for completion of posted collective buffer sends/receives - bool AllPostedCommunicationsFinished(); }; - - #endif // COMMSTATE_HH diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc index 36a48df40..06adb0276 100644 --- a/Carpet/CarpetLib/src/copy_3d.cc +++ b/Carpet/CarpetLib/src/copy_3d.cc @@ -7,7 +7,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc new file mode 100644 index 000000000..c5ba95371 --- /dev/null +++ b/Carpet/CarpetLib/src/copy_4d.cc @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_4d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND4(i,j,k,l) \ + index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srciext, srcjext, srckext, srclext) +#define DSTIND4(i,j,k,l) \ + index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstiext, dstjext, dstkext, dstlext) + + + + template + void + copy_4d (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox) + { + if (any (srcbbox.stride() != regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + cout << "copy_4d.cc:" << endl + << "srcbbox=" << srcbbox << endl + << "dstbbox=" << dstbbox << endl + << "regbbox=" << regbbox << endl; + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != dstbbox.stride())) { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + if (not regbbox.is_contained_in(srcbbox) or + not regbbox.is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + ivect4 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower()) % srcbbox.stride() == 0)); + ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / srcbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % dstbbox.stride() == 0)); + ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / dstbbox.stride(); + + + + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; + ptrdiff_t const srclext = srcext[3]; + + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; + ptrdiff_t const dstlext = dstext[3]; + + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; + ptrdiff_t const reglext = regext[3]; + + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; + ptrdiff_t const srcloff = srcoff[3]; + + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; + ptrdiff_t const dstloff = dstoff[3]; + + + + // Loop over region +#pragma omp parallel for + for (int l=0; l +static +void +call_operator (void + (* the_operator) (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox), + T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox) +{ +#ifndef _OPENMP + (* the_operator) (src, srcext, dst, dstext, srcbbox, dstbbox, regbbox); +#else +# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) + ibset allregbboxes; +# endif +#pragma omp parallel + { + int const num_threads = omp_get_num_threads(); + int const thread_num = omp_get_thread_num(); + // Parallelise in z direction + // TODO: parallelise along longest extent + int const dir = 2; + int const stride = regbbox.stride()[dir]; + int const first_point = regbbox.lower()[dir]; + int const last_point = regbbox.upper()[dir] + stride; + int const num_points = last_point - first_point; + assert (num_points >= 0); + assert (num_points % stride == 0); + int const my_num_points = + (num_points / stride + num_threads - 1) / num_threads * stride; + int const my_first_point = + min (last_point, first_point + thread_num * my_num_points); + int const my_last_point = + max (my_first_point, min (last_point, my_first_point + my_num_points)); + ibbox4 const myregbbox + (regbbox.lower().replace (dir, my_first_point), + regbbox.upper().replace (dir, my_last_point - stride), + regbbox.stride()); + if (not myregbbox.empty()) { + (* the_operator) (src, srcext, dst, dstext, srcbbox, dstbbox, myregbbox); +# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) +#pragma omp critical + allregbboxes += myregbbox; +# endif + } + } +# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) + if (not (allregbboxes == ibset (regbbox))) { + allregbboxes.normalize(); + cout << "allregbboxes=" << allregbboxes << endl + << "regbbox=" << regbbox << endl; + } + assert (allregbboxes == ibset (regbbox)); +# endif +#endif +} + // Fortran wrappers @@ -200,16 +268,13 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src, -static const CCTK_REAL eps = 1.0e-10; - // Constructors template data::data (const int varindex_, const centering cent_, const operator_type transport_operator_, const int vectorlength_, const int vectorindex_, - data* const vectorleader_, - const int tag_) - : gdata(varindex_, cent_, transport_operator_, tag_), + data* const vectorleader_) + : gdata(varindex_, cent_, transport_operator_), _memory(NULL), vectorlength(vectorlength_), vectorindex(vectorindex_), vectorleader(vectorleader_) @@ -249,11 +314,10 @@ data::~data () template data* data::make_typed (const int varindex_, const centering cent_, - const operator_type transport_operator_, - const int tag_) + const operator_type transport_operator_) const { - return new data(varindex_, cent_, transport_operator_, 1, 0, NULL, tag_); + return new data(varindex_, cent_, transport_operator_, 1, 0, NULL); } @@ -335,6 +399,7 @@ copy_from_innerloop (gdata const * const gsrc, assert (proc() == src->proc()); assert (dist::rank() == proc()); +#if CARPET_DIM == 3 copy_3d (static_cast (src->storage()), src->shape(), static_cast (this->storage()), @@ -342,6 +407,17 @@ copy_from_innerloop (gdata const * const gsrc, src->extent(), this->extent(), box); +#elif CARPET_DIM == 4 + copy_4d (static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); +#else +# error "Value for CARPET_DIM not supported" +#endif } @@ -382,7 +458,8 @@ transfer_time (vector const & gsrcs, { // Use this timelevel, or interpolate in time if set to -1 int timelevel0, ntimelevels; - find_source_timelevel (times, time, order_time, timelevel0, ntimelevels); + find_source_timelevel + (times, time, order_time, transport_operator, timelevel0, ntimelevels); if (ntimelevels > 1) { // Time interpolation is necessary @@ -437,7 +514,8 @@ transfer_p_r (data const * const src, copy_from_innerloop (src, box); } else if (all (src->extent().stride() > this->extent().stride())) { // Prolongate - assert (transport_operator != op_sync); + assert (transport_operator != op_sync and + transport_operator != op_restrict); transfer_p_vc_cc (src, box, order_space); } else if (all (src->extent().stride() < this->extent().stride())) { // Restrict @@ -457,6 +535,9 @@ transfer_p_vc_cc (data const * const src, ibbox const & box, int const order_space) { + transfer_prolongate (src, box, order_space); + +#if 0 if (cent == vertex_centered) { // Vertex centred @@ -501,6 +582,8 @@ transfer_p_vc_cc (data const * const src, newdstbox .contracted_for (tmpsrcbox) .expand (offsetlo, offsethi); // Allocate temporary storage + // TODO: This may not be necessary if the source is already a + // temporary data * const newsrc = new data (src->varindex, vertex_centered, src->transport_operator); newsrc->allocate (newsrcbox, src->proc()); @@ -538,6 +621,7 @@ transfer_p_vc_cc (data const * const src, } else { assert (0); } +#endif } template <> @@ -562,25 +646,139 @@ transfer_prolongate (data const * const src, static Timer total ("prolongate"); total.start (); +#if CARPET_DIM == 3 + switch (transport_operator) { case op_copy: case op_Lagrange: { static Timer timer ("prolongate_Lagrange"); timer.start (); + // enum centering { vertex_centered, cell_centered }; + switch (cent) { + case vertex_centered: + switch (order_space) { + case 1: + call_operator (& prolongate_3d_o1_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 3: + call_operator (& prolongate_3d_o3_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 5: + call_operator (& prolongate_3d_o5_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 7: + call_operator (& prolongate_3d_o7_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 9: + call_operator (& prolongate_3d_o9_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 11: + call_operator (& prolongate_3d_o11_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + default: + CCTK_WARN (CCTK_WARN_ABORT, + "There is no vertex-centred stencil for op=\"LAGRANGE\" with order_space not in {1, 3, 5, 7, 9, 11}"); + break; + } + break; + case cell_centered: + switch (order_space) { + case 0: + call_operator (& prolongate_3d_cc_o0_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 1: + call_operator (& prolongate_3d_cc_o1_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + case 2: + call_operator (& prolongate_3d_cc_o2_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + default: + CCTK_WARN (CCTK_WARN_ABORT, + "There is no cell-centred stencil for op=\"LAGRANGE\" with order_space not in {0, 1, 2}"); + break; + } + break; + default: + assert (0); + } + timer.stop (0); + break; + } + + case op_ENO: { + static Timer timer ("prolongate_ENO"); + timer.start (); switch (order_space) { case 1: - call_operator (& prolongate_3d_o1_rf2, - static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + CCTK_WARN (CCTK_WARN_ABORT, + "There is no stencil for op=\"ENO\" with order_space=1"); break; case 3: - call_operator (& prolongate_3d_o3_rf2, + call_operator (& prolongate_3d_eno, static_cast (src->storage()), src->shape(), static_cast (this->storage()), @@ -590,37 +788,10 @@ transfer_prolongate (data const * const src, box); break; case 5: - call_operator (& prolongate_3d_o5_rf2, - static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); - break; - case 7: - call_operator (& prolongate_3d_o7_rf2, - static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); - break; - case 9: - call_operator (& prolongate_3d_o9_rf2, - static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); - break; - case 11: - call_operator (& prolongate_3d_o11_rf2, + // There is only one parameter for the prolongation order, but + // Whisky may want 5th order for spacetime and 3rd order for + // hydro, so we cheat here. + call_operator (& prolongate_3d_eno, static_cast (src->storage()), src->shape(), static_cast (this->storage()), @@ -631,22 +802,26 @@ transfer_prolongate (data const * const src, break; default: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"LAGRANGE\" with order_space not in {1, 3, 5, 7, 9, 11}"); + "There is no stencil for op=\"ENO\" with order_space!=3"); break; } timer.stop (0); break; } - case op_ENO: { - static Timer timer ("prolongate_ENO"); + case op_WENO: { + static Timer timer ("prolongate_WENO"); timer.start (); switch (order_space) { case 1: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"ENO\" with order_space=1"); + "There is no stencil for op=\"WENO\" with order_space=1"); break; case 3: + CCTK_WARN (CCTK_WARN_ABORT, + "There is no stencil for op=\"WENO\" with order_space=3"); + break; + case 5: call_operator (& prolongate_3d_eno, static_cast (src->storage()), src->shape(), @@ -656,40 +831,29 @@ transfer_prolongate (data const * const src, this->extent(), box); break; - case 5: - // there is only a parameter for the prolongation order, but Whisky may want 5th order for spacetime and 3rd order for hydro; so this is a trick. - call_operator (& prolongate_3d_eno, - static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); - break; default: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"ENO\" with order_space!=3"); + "There is no stencil for op=\"WENO\" with order_space!=5"); break; } timer.stop (0); break; } - case op_WENO: { - static Timer timer ("prolongate_WENO"); + case op_Lagrange_monotone: { + static Timer timer ("prolongate_Lagrange_monotone"); timer.start (); switch (order_space) { case 1: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"WENO\" with order_space=1"); + "There is no stencil for op=\"Lagrange_monotone\" with order_space=1"); break; case 3: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"WENO\" with order_space=3"); + "There is no stencil for op=\"Lagrange_monotone\" with order_space=3"); break; case 5: - call_operator (& prolongate_3d_eno, + call_operator (& prolongate_3d_o5_monotone_rf2, static_cast (src->storage()), src->shape(), static_cast (this->storage()), @@ -700,7 +864,7 @@ transfer_prolongate (data const * const src, break; default: CCTK_WARN (CCTK_WARN_ABORT, - "There is no stencil for op=\"WENO\" with order_space!=5"); + "There is no stencil for op=\"Lagrange_monotone\" with order_space!=5"); break; } timer.stop (0); @@ -711,6 +875,48 @@ transfer_prolongate (data const * const src, assert (0); } // switch (transport_operator) +#elif CARPET_DIM == 4 + + switch (transport_operator) { + + case op_copy: + case op_Lagrange: { + static Timer timer ("prolongate_Lagrange"); + timer.start (); + // enum centering { vertex_centered, cell_centered }; + switch (cent) { + case vertex_centered: + switch (order_space) { + case 1: + call_operator (& prolongate_4d_o1_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + default: + CCTK_WARN (CCTK_WARN_ABORT, + "There is no vertex-centred stencil for op=\"LAGRANGE\" with order_space not in {1}"); + break; + } + break; + default: + assert (0); + } + timer.stop (0); + break; + } + default: + assert (0); + } // switch (transport_operator) + +#else +# error "Value for CARPET_DIM not supported" +#endif + total.stop (0); } @@ -736,12 +942,15 @@ transfer_restrict (data const * const src, static Timer total ("restrict"); total.start (); +#if CARPET_DIM == 3 + switch (transport_operator) { case op_copy: case op_Lagrange: case op_ENO: case op_WENO: + case op_Lagrange_monotone: // enum centering { vertex_centered, cell_centered }; switch (cent) { case vertex_centered: @@ -771,6 +980,36 @@ transfer_restrict (data const * const src, assert (0); } +#elif CARPET_DIM == 4 + + switch (transport_operator) { + + case op_copy: + case op_Lagrange: + // enum centering { vertex_centered, cell_centered }; + switch (cent) { + case vertex_centered: + restrict_4d_rf2 (static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); + break; + default: + assert (0); + } + break; + + default: + assert (0); + } + +#else +# error "Value for CARPET_DIM not supported" +#endif + total.stop (0); } @@ -797,7 +1036,9 @@ time_interpolate (vector const & srcs, { static Timer total ("time_interpolate"); total.start (); - + +#if CARPET_DIM == 3 + switch (transport_operator) { case op_copy: @@ -886,8 +1127,10 @@ time_interpolate (vector const & srcs, } case op_ENO: - case op_WENO: { - // ENO and WENO timer interpolation is the same for order_time <= 2 + case op_WENO: + case op_Lagrange_monotone: { + // ENO, WENO, and Lagrange_monotone time interpolation is the same + // for order_time <= 2 static Timer timer ("time_interpolate_ENO"); timer.start (); switch (order_time) { @@ -935,6 +1178,14 @@ time_interpolate (vector const & srcs, assert (0); } // switch (transport_operator) +#elif CARPET_DIM == 4 + + assert (0); + +#else +# error "Value for CARPET_DIM not supported" +#endif + total.stop (0); } @@ -982,22 +1233,9 @@ output (ostream & os) return os; } -template -ostream & -operator << (ostream & os, data const & d) -{ - char const * space = ""; - for (int i = 0; i < d.vectorlength; i++) { - os << space << d[i]; - space = " "; - } - return os; -} - #define INSTANTIATE(T) \ -template class data; \ -template ostream & operator << (ostream & os, data const & d); +template class data; #include "instantiate" #undef INSTANTIATE diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh index 6b7a774e1..d321e46c1 100644 --- a/Carpet/CarpetLib/src/data.hh +++ b/Carpet/CarpetLib/src/data.hh @@ -20,9 +20,6 @@ using namespace std; template class data; -template -ostream & operator << ( ostream & os, const data & d ); - // A distributed multi-dimensional array template class data: public gdata @@ -35,7 +32,7 @@ class data: public gdata int vectorlength; // number of vector elements int vectorindex; // index of this vector element data* vectorleader; // if index!=0: first vector element - + private: // Forbid copying and passing by value data (data const &); @@ -48,23 +45,21 @@ public: const centering cent = error_centered, const operator_type transport_operator = op_error, const int vectorlength = 1, const int vectorindex = 0, - data* const vectorleader = NULL, - const int tag = -1); + data* const vectorleader = NULL); data (const int varindex, const centering cent, const operator_type transport_operator, const int vectorlength, const int vectorindex, data* const vectorleader, const ibbox& extent, const int proc); - + // Destructors virtual ~data (); - + // Pseudo constructors virtual data* make_typed (const int varindex, const centering cent, - const operator_type transport_operator, - const int tag) const; - + const operator_type transport_operator) const; + // Storage management virtual void allocate (const ibbox& extent, const int proc, void* const memptr = NULL, size_t const memsize = 0); @@ -156,19 +151,10 @@ private: public: // Memory usage - size_t memory () const; + virtual size_t memory () const CCTK_ATTRIBUTE_PURE; // Output - ostream & output (ostream& os) const; - - friend ostream & operator<< (ostream & os, data const & d); + virtual ostream & output (ostream& os) const; }; - -// Memory usage -template -inline size_t memoryof (data const & d) -{ - return d.memory(); -} #endif // DATA_HH diff --git a/Carpet/CarpetLib/src/defs.cc b/Carpet/CarpetLib/src/defs.cc index 357061ee2..ab3b021d8 100644 --- a/Carpet/CarpetLib/src/defs.cc +++ b/Carpet/CarpetLib/src/defs.cc @@ -2,11 +2,13 @@ #include #include #include +#include #include #include #include #include "cctk.h" +#include "cctk_Parameters.h" #include "bbox.hh" #include "defs.hh" @@ -18,6 +20,8 @@ using namespace std; +template +inline T ipow_helper (T x, unsigned int y) CCTK_ATTRIBUTE_CONST; template inline T ipow_helper (T x, unsigned int y) { @@ -31,7 +35,7 @@ inline T ipow_helper (T x, unsigned int y) } template -T ipow (T x, int y) +T ipow (T const x, int const y) { if (y < 0) return T(1) / ipow_helper(x, -y); @@ -41,8 +45,23 @@ T ipow (T x, int y) +// Access to CarpetLib parameters +CCTK_INT get_poison_value() +{ + DECLARE_CCTK_PARAMETERS; + return poison_value; +} + +CCTK_INT get_deadbeef() +{ + DECLARE_CCTK_PARAMETERS; + return deadbeef; +} + + + void skipws (istream& is) { - while (is.good() && isspace(is.peek())) { + while (is.good() and isspace(is.peek())) { is.get(); } } @@ -138,6 +157,60 @@ memoryof (vector const & c) +// List input +template +istream& input (istream& is, list& l) { + l.clear(); + try { + skipws (is); + consume (is, '['); + skipws (is); + while (is.good() and is.peek() != ']') { + T elem; + is >> elem; + l.push_back (elem); + skipws (is); + if (is.peek() != ',') break; + is.get(); + skipws (is); + } + skipws (is); + consume (is, ']'); + } catch (input_error &err) { + cout << "Input error while reading a list<>" << endl + << " The following elements have been read so far: " << l << endl; + throw err; + } + return is; +} + +// Set input +template +istream& input (istream& is, set& s) { + s.clear(); + try { + skipws (is); + consume (is, '{'); + skipws (is); + while (is.good() and is.peek() != '}') { + T elem; + is >> elem; + s.insert (elem); + skipws (is); + if (is.peek() != ',') break; + is.get(); + skipws (is); + } + skipws (is); + consume (is, ']'); + } catch (input_error &err) { + cout << "Input error while reading a set<>" << endl + << " The following elements have been read so far: " << s << endl; + throw err; + } + return is; +} + // Vector input template istream& input (istream& is, vector& v) { @@ -146,7 +219,7 @@ istream& input (istream& is, vector& v) { skipws (is); consume (is, '['); skipws (is); - while (is.good() && is.peek() != ']') { + while (is.good() and is.peek() != ']') { T elem; is >> elem; v.push_back (elem); @@ -179,6 +252,25 @@ ostream& output (ostream& os, const list& l) { return os; } +// Map output +template +ostream& output (ostream& os, const map& m) { + os << "{"; + for (typename map::const_iterator ti=m.begin(); ti!=m.end(); ++ti) { + if (ti!=m.begin()) os << ","; + os << ti->first << ":" << ti->second; + } + os << "}"; + return os; +} + +// Pair output +template +ostream& output (ostream& os, const pair& p) { + os << "(" << p.first << "," << p.second << ")"; + return os; +} + // Set output template ostream& output (ostream& os, const set& s) { @@ -196,7 +288,7 @@ template ostream& output (ostream& os, const stack& s) { stack s2 (s); list l; - while (! s2.empty()) { + while (not s2.empty()) { l.insert (l.begin(), s2.top()); s2.pop(); } @@ -231,28 +323,32 @@ ostream& output (ostream& os, const vector& v) { #include "th.hh" #include "vect.hh" +#include "CarpetTimers.hh" + template int ipow (int x, int y); template CCTK_REAL ipow (CCTK_REAL x, int y); -template vect ipow (vect x, int y); +template vect ipow (vect x, int y); -template size_t memoryof (list > const & l); -template size_t memoryof (list > const & l); +template size_t memoryof (list > const & l); +template size_t memoryof (list > const & l); template size_t memoryof (list const & l); +template size_t memoryof (list const & l); +template size_t memoryof (list const & l); template size_t memoryof (list const & l); template size_t memoryof (list const & l); template size_t memoryof (stack const & s); template size_t memoryof (vector const & v); template size_t memoryof (vector const & v); template size_t memoryof (vector const & v); -template size_t memoryof (vector > const & v); -template size_t memoryof (vector > const & v); -template size_t memoryof (vector *> const & f); +template size_t memoryof (vector > const & v); +template size_t memoryof (vector > const & v); +template size_t memoryof (vector *> const & f); template size_t memoryof (vector const & v); template size_t memoryof (vector const & v); template size_t memoryof (vector const & v); template size_t memoryof (vector > const & v); template size_t memoryof (vector > const & v); -template size_t memoryof (vector > > const & v); +template size_t memoryof (vector > > const & v); template size_t memoryof (vector > const & v); template size_t memoryof (vector > const & v); template size_t memoryof (vector > const & v); @@ -262,49 +358,54 @@ template size_t memoryof (vector > > const & v); template size_t memoryof (vector > > const & v); template size_t memoryof (vector > > > const & v); +template istream& input (istream& os, list >& l); +template istream& input (istream& os, set >& s); template istream& input (istream& os, vector& v); template istream& input (istream& os, vector& v); -template istream& input (istream& os, vector >& v); -template istream& input (istream& os, vector >& v); -template istream& input (istream& os, vector >& v); -template istream& input (istream& os, vector,3> >& v); +template istream& input (istream& os, vector >& v); +template istream& input (istream& os, vector >& v); +template istream& input (istream& os, vector >& v); +template istream& input (istream& os, vector,dim> >& v); template istream& input (istream& os, vector& v); +template istream& input (istream& os, vector& v); +template istream& input (istream& os, vector& v); template istream& input (istream& os, vector >& v); -template istream& input (istream& os, vector > >& v); -template istream& input (istream& os, vector > >& v); -template istream& input (istream& os, vector,3> > >& v); +template istream& input (istream& os, vector > >& v); +template istream& input (istream& os, vector > >& v); +template istream& input (istream& os, vector,dim> > >& v); template istream& input (istream& os, vector >& v); template istream& input (istream& os, vector > >& v); template istream& input (istream& os, vector > >& v); -template ostream& output (ostream& os, const list >& l); +template ostream& output (ostream& os, const list >& l); template ostream& output (ostream& os, const list& l); -template ostream& output (ostream& os, const set >& s); -template ostream& output (ostream& os, const set >& s); -template ostream& output (ostream& os, const stack >& s); +template ostream& output (ostream& os, const map& m); +template ostream& output (ostream& os, const set >& s); +template ostream& output (ostream& os, const set >& s); +template ostream& output (ostream& os, const stack >& s); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); -template ostream& output (ostream& os, const vector >& v); -template ostream& output (ostream& os, const vector >& v); -template ostream& output (ostream& os, const vector >& v); -template ostream& output (ostream& os, const vector,3> >& v); +template ostream& output (ostream& os, const vector >& v); +template ostream& output (ostream& os, const vector >& v); +template ostream& output (ostream& os, const vector >& v); +template ostream& output (ostream& os, const vector,dim> >& v); template ostream& output (ostream& os, const vector & v); template ostream& output (ostream& os, const vector & v); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); template ostream& output (ostream& os, const vector& v); -template ostream& output (ostream& os, const vector > >& v); +template ostream& output (ostream& os, const vector > >& v); template ostream& output (ostream& os, const vector >& v); template ostream& output (ostream& os, const vector >& v); -template ostream& output (ostream& os, const vector > >& v); -template ostream& output (ostream& os, const vector > >& v); -template ostream& output (ostream& os, const vector,3> > >& v); +template ostream& output (ostream& os, const vector > >& v); +template ostream& output (ostream& os, const vector > >& v); +template ostream& output (ostream& os, const vector,dim> > >& v); template ostream& output (ostream& os, const vector > & b); template ostream& output (ostream& os, const vector > & b); template ostream& output (ostream& os, const vector >& v); template ostream& output (ostream& os, const vector > >& v); -template ostream& output (ostream& os, const vector > > >& v); +template ostream& output (ostream& os, const vector > > >& v); template ostream& output (ostream& os, const vector > > & b); template ostream& output (ostream& os, const vector > > & b); template ostream& output (ostream& os, const vector > >& v); diff --git a/Carpet/CarpetLib/src/defs.hh b/Carpet/CarpetLib/src/defs.hh index a44d20ef0..3a1188d21 100644 --- a/Carpet/CarpetLib/src/defs.hh +++ b/Carpet/CarpetLib/src/defs.hh @@ -11,25 +11,24 @@ #include #include #include +#include #include #include #include #include "cctk.h" +#include "typeprops.hh" -using namespace std; - - -// A compile time pseudo assert statement -#define static_assert(_x, _msg) do { typedef int ai[(_x) ? 1 : -1]; } while(0) +using namespace std; -// Check a return value -#define check(_expr) do { bool const _val = (_expr); assert(_val); } while(0) +// Stringify +#define STRINGIFY1(x) #x +#define STRINGIFY(x) STRINGIFY1(x) @@ -40,14 +39,11 @@ using namespace std; -// Use this macro AT instead of vector's operator[] or at(). -// Depending on the macro NDEBUG, this macro AT either checks for -// valid indices or not. -#ifndef CARPET_OPTIMISE -# define AT(index) at(index) -#else -# define AT(index) operator[](index) +// Number of dimensions +#ifndef CARPET_DIM +# define CARPET_DIM 3 #endif +const int dim = CARPET_DIM; @@ -55,9 +51,25 @@ using namespace std; char const * const eol = "\n"; + +// A compile time pseudo assert statement +#define static_assert(_x, _msg) do { typedef int ai[(_x) ? 1 : -1]; } while(0) -// Number of dimensions -const int dim = 3; + + +// Check a return value +#define check(_expr) do { bool const _val = (_expr); assert(_val); } while(0) + + + +// Use this macro AT instead of vector's operator[] or at(). +// Depending on the macro CARPET_OPTIMISE, this macro AT either checks +// for valid indices or not. +#if ! defined(CARPET_OPTIMISE) +# define AT(index) at(index) +#else +# define AT(index) operator[](index) +#endif @@ -100,11 +112,19 @@ enum centering { error_centered, vertex_centered, cell_centered }; // Useful helper template +inline T square (const T& x) CCTK_ATTRIBUTE_CONST; +template inline T square (const T& x) { return x*x; } // Another useful helper template -T ipow (T x, int y); +T ipow (T x, int y) CCTK_ATTRIBUTE_CONST; + + + +// Access to CarpetLib parameters +CCTK_INT get_poison_value() CCTK_ATTRIBUTE_CONST; +CCTK_INT get_deadbeef() CCTK_ATTRIBUTE_CONST; @@ -120,52 +140,62 @@ void consume (istream& is, char const * c); // Names for types #ifdef HAVE_CCTK_INT1 -inline const char * typestring (const CCTK_INT1& dummy) +inline const char * typestring (const CCTK_INT1&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_INT1&) { return "CCTK_INT1"; } #endif #ifdef HAVE_CCTK_INT2 -inline const char * typestring (const CCTK_INT2& dummy) +inline const char * typestring (const CCTK_INT2&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_INT2&) { return "CCTK_INT2"; } #endif #ifdef HAVE_CCTK_INT4 -inline const char * typestring (const CCTK_INT4& dummy) +inline const char * typestring (const CCTK_INT4&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_INT4&) { return "CCTK_INT4"; } #endif #ifdef HAVE_CCTK_INT8 -inline const char * typestring (const CCTK_INT8& dummy) +inline const char * typestring (const CCTK_INT8&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_INT8&) { return "CCTK_INT8"; } #endif #ifdef HAVE_CCTK_REAL4 -inline const char * typestring (const CCTK_REAL4& dummy) +inline const char * typestring (const CCTK_REAL4&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_REAL4&) { return "CCTK_REAL4"; } #endif #ifdef HAVE_CCTK_REAL8 -inline const char * typestring (const CCTK_REAL8& dummy) +inline const char * typestring (const CCTK_REAL8&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_REAL8&) { return "CCTK_REAL8"; } #endif #ifdef HAVE_CCTK_REAL16 -inline const char * typestring (const CCTK_REAL16& dummy) +inline const char * typestring (const CCTK_REAL16&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_REAL16&) { return "CCTK_REAL16"; } #endif #ifdef HAVE_CCTK_REAL4 -inline const char * typestring (const CCTK_COMPLEX8& dummy) +inline const char * typestring (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_COMPLEX8&) { return "CCTK_COMPLEX8"; } #endif #ifdef HAVE_CCTK_REAL8 -inline const char * typestring (const CCTK_COMPLEX16& dummy) +inline const char * typestring (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_COMPLEX16&) { return "CCTK_COMPLEX16"; } #endif #ifdef HAVE_CCTK_REAL16 -inline const char * typestring (const CCTK_COMPLEX32& dummy) +inline const char * typestring (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST; +inline const char * typestring (const CCTK_COMPLEX32&) { return "CCTK_COMPLEX32"; } #endif @@ -174,78 +204,105 @@ inline const char * typestring (const CCTK_COMPLEX32& dummy) namespace CarpetLib { namespace good { - // Explicitly overload abs for all types in the same namespace, to - // circumvent confusion among some compilers + // Explicitly overload some functions for all types in the same + // namespace CarpetLib::good, to circumvent confusion among some + // compilers - // CCTK_BYTE is unsigned - inline CCTK_BYTE abs (CCTK_BYTE const & x) { return x; } + // + // abs + // -#if 0 - // This does not work on AIX, which does not have long long abs - // (long long) -# ifdef HAVE_CCTK_INT1 - inline CCTK_INT1 abs (CCTK_INT1 const & x) { return std::abs (x); } -# endif -# ifdef HAVE_CCTK_INT2 - inline CCTK_INT2 abs (CCTK_INT2 const & x) { return std::abs (x); } -# endif -# ifdef HAVE_CCTK_INT4 - inline CCTK_INT4 abs (CCTK_INT4 const & x) { return std::abs (x); } -# endif -# ifdef HAVE_CCTK_INT8 - inline CCTK_INT8 abs (CCTK_INT8 const & x) { return std::abs (x); } -# endif -#endif + template + inline typename typeprops::real abs (T const & x) CCTK_ATTRIBUTE_CONST; + template + inline typename typeprops::real abs (T const & x) + { return std::abs (x); } -#if 0 - // This does not work on Linux with Intel compilers, which do not - // always have long long llabs (long long) - inline signed char abs (signed char const & x) { return ::abs (x); } - inline unsigned char abs (unsigned char const & x) { return ::abs (x); } - inline short abs (short const & x) { return ::abs (x); } - inline int abs (int const & x) { return ::abs (x); } - inline long abs (long const & x) { return ::labs (x); } -# ifdef SIZEOF_LONG_LONG - inline long long abs (long long const & x) { return ::llabs (x); } -# endif -#endif +// // This does not work on Linux with Intel compilers, which do not +// // always have long long llabs (long long) +// template<> inline signed char abs (signed char const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); } +// template<> inline unsigned char abs (unsigned char const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); } +// template<> inline short abs (short const & x) { return ::abs (x); } +// template<> inline int abs (int const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); } +// template<> inline long abs (long const & x) CCTK_ATTRIBUTE_CONST { return ::labs (x); } +// #ifdef SIZEOF_LONG_LONG +// inline long long abs (long long const & x) CCTK_ATTRIBUTE_CONST { return ::llabs (x); } +// #endif -#if 1 -# ifdef HAVE_CCTK_INT1 - inline CCTK_INT1 abs (CCTK_INT1 const & x) { return x < 0 ? - x : x; } -# endif -# ifdef HAVE_CCTK_INT2 - inline CCTK_INT2 abs (CCTK_INT2 const & x) { return x < 0 ? - x : x; } -# endif -# ifdef HAVE_CCTK_INT4 - inline CCTK_INT4 abs (CCTK_INT4 const & x) { return x < 0 ? - x : x; } -# endif -# ifdef HAVE_CCTK_INT8 - inline CCTK_INT8 abs (CCTK_INT8 const & x) { return x < 0 ? - x : x; } -# endif +// // This template does not work on AIX, which does not have long +// // long abs (long long) +// #ifdef HAVE_CCTK_INT1 +// template<> inline CCTK_INT1 abs (CCTK_INT1 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; } +// #endif +// #ifdef HAVE_CCTK_INT2 +// template<> inline CCTK_INT2 abs (CCTK_INT2 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; } +// #endif +// #ifdef HAVE_CCTK_INT4 +// template<> inline CCTK_INT4 abs (CCTK_INT4 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; } +// #endif +// #ifdef HAVE_CCTK_INT8 +// template<> inline CCTK_INT8 abs (CCTK_INT8 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; } +// #endif + +#ifdef HAVE_CCTK_COMPLEX8 + template<> inline CCTK_REAL4 abs (CCTK_COMPLEX8 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline CCTK_REAL4 abs (CCTK_COMPLEX8 const & x) + { return CCTK_Cmplx8Abs (x); } +#endif +#ifdef HAVE_CCTK_COMPLEX16 + template<> inline CCTK_REAL8 abs (CCTK_COMPLEX16 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline CCTK_REAL8 abs (CCTK_COMPLEX16 const & x) + { return CCTK_Cmplx16Abs (x); } +#endif +#ifdef HAVE_CCTK_COMPLEX32 + template<> inline CCTK_REAL16 abs (CCTK_COMPLEX32 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline CCTK_REAL16 abs (CCTK_COMPLEX32 const & x) + { return CCTK_Cmplx32Abs (x); } #endif + // + // isnan + // + +#undef isnan + + // Default implementation, only good for integers + template + inline int isnan (T const & x) CCTK_ATTRIBUTE_CONST; + template + inline int isnan (T const & x) + { return 0; } + #ifdef HAVE_CCTK_REAL4 - inline CCTK_REAL4 abs (CCTK_REAL4 const & x) { return std::abs (x); } + template<> inline int isnan (CCTK_REAL4 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_REAL4 const & x) + { return ::isnan (x); } #endif #ifdef HAVE_CCTK_REAL8 - inline CCTK_REAL8 abs (CCTK_REAL8 const & x) { return std::abs (x); } + template<> inline int isnan (CCTK_REAL8 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_REAL8 const & x) + { return ::isnan (x); } #endif #ifdef HAVE_CCTK_REAL16 - inline CCTK_REAL16 abs (CCTK_REAL16 const & x) { return std::abs (x); } + template<> inline int isnan (CCTK_REAL16 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_REAL16 const & x) + { return ::isnan (x); } #endif #ifdef HAVE_CCTK_COMPLEX8 - inline CCTK_REAL4 abs (CCTK_COMPLEX8 const & x) - { return CCTK_Cmplx8Abs (x); } + template<> inline int isnan (CCTK_COMPLEX8 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_COMPLEX8 const & x) + { return ::isnan (CCTK_Cmplx8Real (x)) or ::isnan (CCTK_Cmplx8Imag (x)); } #endif #ifdef HAVE_CCTK_COMPLEX16 - inline CCTK_REAL8 abs (CCTK_COMPLEX16 const & x) - { return CCTK_Cmplx16Abs (x); } + template<> inline int isnan (CCTK_COMPLEX16 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_COMPLEX16 const & x) + { return ::isnan (CCTK_Cmplx16Real (x)) or ::isnan (CCTK_Cmplx16Imag (x)); } #endif #ifdef HAVE_CCTK_COMPLEX32 - inline CCTK_REAL16 abs (CCTK_COMPLEX32 const & x) - { return CCTK_Cmplx32Abs (x); } + template<> inline int isnan (CCTK_COMPLEX32 const & x) CCTK_ATTRIBUTE_CONST; + template<> inline int isnan (CCTK_COMPLEX32 const & x) + { return ::isnan (CCTK_Cmplx32Real (x)) or std::isnan (CCTK_Cmplx32Imag (x)); } #endif } // namespace good @@ -254,32 +311,68 @@ namespace CarpetLib { // Container memory usage -inline size_t memoryof (char e) { return sizeof e; } -inline size_t memoryof (short e) { return sizeof e; } -inline size_t memoryof (int e) { return sizeof e; } -inline size_t memoryof (long e) { return sizeof e; } -inline size_t memoryof (long long e) { return sizeof e; } -inline size_t memoryof (unsigned char e) { return sizeof e; } -inline size_t memoryof (unsigned short e) { return sizeof e; } -inline size_t memoryof (unsigned int e) { return sizeof e; } -inline size_t memoryof (unsigned long e) { return sizeof e; } -inline size_t memoryof (unsigned long long e) { return sizeof e; } -inline size_t memoryof (float e) { return sizeof e; } -inline size_t memoryof (double e) { return sizeof e; } -inline size_t memoryof (long double e) { return sizeof e; } -inline size_t memoryof (void * e) { return sizeof e; } -template inline size_t memoryof (T * e) { return sizeof e; } -template inline size_t memoryof (T const * e) { return sizeof e; } -template size_t memoryof (list const & c); -template size_t memoryof (set const & c); -template size_t memoryof (stack const & c); -template size_t memoryof (vector const & c); +inline size_t memoryof (char const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (short const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (int const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (long const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (long long const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (unsigned char const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (unsigned short const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (unsigned int const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (unsigned long const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (unsigned long long const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (float const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (double const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (long double const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (void * const e) CCTK_ATTRIBUTE_CONST; +inline size_t memoryof (void const * const e) CCTK_ATTRIBUTE_CONST; +template inline size_t memoryof (T * const e) CCTK_ATTRIBUTE_CONST; +template inline size_t memoryof (T const * const e) CCTK_ATTRIBUTE_CONST; +template inline size_t memoryof (typename list::iterator const & i) CCTK_ATTRIBUTE_CONST; +template inline size_t memoryof (typename list::const_iterator const & i) CCTK_ATTRIBUTE_CONST; + +inline size_t memoryof (char const e) { return sizeof e; } +inline size_t memoryof (short const e) { return sizeof e; } +inline size_t memoryof (int const e) { return sizeof e; } +inline size_t memoryof (long const e) { return sizeof e; } +inline size_t memoryof (long long const e) { return sizeof e; } +inline size_t memoryof (unsigned char const e) { return sizeof e; } +inline size_t memoryof (unsigned short const e) { return sizeof e; } +inline size_t memoryof (unsigned int const e) { return sizeof e; } +inline size_t memoryof (unsigned long const e) { return sizeof e; } +inline size_t memoryof (unsigned long long const e) { return sizeof e; } +inline size_t memoryof (float const e) { return sizeof e; } +inline size_t memoryof (double const e) { return sizeof e; } +inline size_t memoryof (long double const e) { return sizeof e; } +inline size_t memoryof (void * const e) { return sizeof e; } +inline size_t memoryof (void const * const e) { return sizeof e; } +template inline size_t memoryof (T * const e) { return sizeof e; } +template inline size_t memoryof (T const * const e) { return sizeof e; } +template inline size_t memoryof (typename list::iterator const & i) { return sizeof i; } +template inline size_t memoryof (typename list::const_iterator const & i) { return sizeof i; } + +template size_t memoryof (list const & c) CCTK_ATTRIBUTE_PURE; +template size_t memoryof (set const & c) CCTK_ATTRIBUTE_PURE; +template size_t memoryof (stack const & c) CCTK_ATTRIBUTE_PURE; +template size_t memoryof (vector const & c) CCTK_ATTRIBUTE_PURE; // Container input +template istream& input (istream& is, list& l); +template istream& input (istream& is, set& s); template istream& input (istream& is, vector& v); +template +inline istream& operator>> (istream& is, list& l) { + return input(is,l); +} + +template +inline istream& operator>> (istream& is, set& s) { + return input(is,s); +} + template inline istream& operator>> (istream& is, vector& v) { return input(is,v); @@ -289,6 +382,8 @@ inline istream& operator>> (istream& is, vector& v) { // Container output template ostream& output (ostream& os, const list& l); +template ostream& output (ostream& os, const map& m); +template ostream& output (ostream& os, const pair& p); template ostream& output (ostream& os, const set& s); template ostream& output (ostream& os, const stack& s); template ostream& output (ostream& os, const vector& v); @@ -298,6 +393,11 @@ inline ostream& operator<< (ostream& os, const list& l) { return output(os,l); } +template +inline ostream& operator<< (ostream& os, const map& m) { + return output(os,m); +} + template inline ostream& operator<< (ostream& os, const set& s) { return output(os,s); diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc index 046600c6b..36b3c310f 100644 --- a/Carpet/CarpetLib/src/dh.cc +++ b/Carpet/CarpetLib/src/dh.cc @@ -3,6 +3,9 @@ #include "cctk.h" #include "cctk_Parameters.h" +#include "CarpetTimers.hh" + +#include "mpi_string.hh" #include "bbox.hh" #include "bboxset.hh" #include "defs.hh" @@ -19,6 +22,10 @@ using namespace CarpetLib; +list dh::alldh; + + + // Constructors dh:: dh (gh & h_, @@ -31,12 +38,14 @@ dh (gh & h_, assert (all (all (ghost_width >= 0))); assert (all (all (buffer_width >= 0))); assert (prolongation_order_space >= 0); - h.add (this); + alldhi = alldh.insert(alldh.end(), this); + gh_handle = h.add (this); CHECKPOINT; - regrid (); + regrid (false); for (int rl = 0; rl < h.reflevels(); ++ rl) { recompose (rl, false); } + regrid_free (false); } @@ -46,7 +55,8 @@ dh:: ~dh () { CHECKPOINT; - h.remove (this); + h.erase (gh_handle); + alldh.erase(alldhi); } @@ -135,6 +145,15 @@ assert_error (char const * restrict const checkstring, there_was_an_error = true; } +#ifdef CARPET_OPTIMISE + +// For highest efficiency, omit all self-checks +#define ASSERT_rl(check, message) +#define ASSERT_c(check, message) +#define ASSERT_cc(check, message) + +#else + #define ASSERT_rl(check, message) \ do { \ if (not (check)) { \ @@ -156,41 +175,60 @@ assert_error (char const * restrict const checkstring, } \ } while (false) +#endif + void dh:: -regrid () +regrid (bool const do_init) { DECLARE_CCTK_PARAMETERS; + + static Carpet::Timer timer ("CarpetLib::dh::regrid"); + timer.start(); CHECKPOINT; - static Timer total ("dh::regrid"); + static Timer total ("CarpetLib::dh::regrid"); total.start (); - oldboxes.clear(); + mboxes oldboxes; swap (boxes, oldboxes); - fast_oldboxes.clear(); - swap (fast_boxes, fast_oldboxes); + + full_mboxes full_boxes; + + fast_boxes.clear(); + // cerr << "QQQ: regrid[1]" << endl; boxes.resize (h.mglevels()); + full_boxes.resize (h.mglevels()); fast_boxes.resize (h.mglevels()); for (int ml = 0; ml < h.mglevels(); ++ ml) { + // cerr << "QQQ: regrid[2] ml=" << ml << endl; boxes.AT(ml).resize (h.reflevels()); + full_boxes.AT(ml).resize (h.reflevels()); fast_boxes.AT(ml).resize (h.reflevels()); for (int rl = 0; rl < h.reflevels(); ++ rl) { + // cerr << "QQQ: regrid[3] rl=" << rl << endl; boxes.AT(ml).AT(rl).resize (h.components(rl)); - fast_boxes.AT(ml).AT(rl).resize (dist::size()); + full_boxes.AT(ml).AT(rl).resize (h.components(rl)); cboxes & level = boxes.AT(ml).AT(rl); - fast_cboxes & fast_level = fast_boxes.AT(ml).AT(rl); + full_cboxes & full_level = full_boxes.AT(ml).AT(rl); + fast_dboxes & fast_level = fast_boxes.AT(ml).AT(rl); + + vector fast_level_otherprocs (dist::size()); // Domain: + // cerr << "QQQ: regrid[a]" << endl; + + static Carpet::Timer timer_domain ("CarpetLib::dh::regrid::domain"); + timer_domain.start(); ibbox const & domain_exterior = h.baseextent(ml,rl); // Variables may have size zero @@ -211,17 +249,24 @@ regrid () ibset domain_boundary = domain_exterior - domain_active; domain_boundary.normalize(); + timer_domain.stop(); + + static Carpet::Timer timer_region ("CarpetLib::dh::regrid::region"); + timer_region.start(); + + // cerr << "QQQ: regrid[b]" << endl; for (int c = 0; c < h.components(rl); ++ c) { - dboxes & box = boxes.AT(ml).AT(rl).AT(c); + full_dboxes & box = full_level.AT(c); // Interior: ibbox & intr = box.interior; + intr = ibbox::poison(); // The interior of the grid has the extent as specified by the // regridding thorn @@ -237,10 +282,12 @@ regrid () "The interior must be contained in the domain"); // All interiors must be disjunct +#ifdef CARPET_DEBUG for (int cc = 0; cc < c; ++ cc) { - ASSERT_cc (not intr.intersects (level.AT(cc).interior), + ASSERT_cc (not intr.intersects (full_level.AT(cc).interior), "All interiors must be disjunct"); } +#endif @@ -261,6 +308,7 @@ regrid () // Exterior: ibbox & extr = box.exterior; + extr = ibbox::poison(); ASSERT_c (all (all (ghost_width >= 0)), "The gh ghost widths must not be negative"); @@ -280,6 +328,7 @@ regrid () // Cactus ghost zones (which include outer boundaries): ibset & ghosts = box.ghosts; + ghosts = ibset::poison(); ghosts = extr - intr; ghosts.normalize(); @@ -295,6 +344,7 @@ regrid () // Communicated region: ibbox & comm = box.communicated; + comm = ibbox::poison(); comm = extr.expand (i2vect (is_outer_boundary) * (- boundary_width)); @@ -313,6 +363,7 @@ regrid () // Outer boundary: ibset & outer_boundaries = box.outer_boundaries; + outer_boundaries = ibset::poison(); outer_boundaries = extr - comm; outer_boundaries.normalize(); @@ -327,6 +378,7 @@ regrid () // Owned region: ibbox & owned = box.owned; + owned = ibbox::poison(); owned = intr.expand (i2vect (is_outer_boundary) * (- boundary_width)); @@ -341,10 +393,12 @@ regrid () "The owned region must be contained in the active part of the domain"); // All owned regions must be disjunct +#ifdef CARPET_DEBUG for (int cc = 0; cc < c; ++ cc) { - ASSERT_cc (not owned.intersects (level.AT(cc).owned), + ASSERT_cc (not owned.intersects (full_level.AT(cc).owned), "All owned regions must be disjunct"); } +#endif @@ -352,6 +406,7 @@ regrid () // boundaries): ibset & boundaries = box.boundaries; + boundaries = ibset::poison(); boundaries = comm - owned; boundaries.normalize(); @@ -365,9 +420,15 @@ regrid () } // for c + timer_region.stop(); + // Conjunction of all buffer zones: + // cerr << "QQQ: regrid[c]" << endl; + + static Carpet::Timer timer_buffers ("CarpetLib::dh::regrid::buffers"); + timer_buffers.start(); // Enlarge active part of domain i2vect const safedist = i2vect (0); @@ -376,7 +437,7 @@ regrid () // All owned regions ibset allowned; for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); + full_dboxes const & box = full_level.AT(c); allowned += box.owned; } allowned.normalize(); @@ -409,20 +470,13 @@ regrid () for (int c = 0; c < h.components(rl); ++ c) { - - dboxes & box = boxes.AT(ml).AT(rl).AT(c); - - + full_dboxes & box = full_level.AT(c); // Buffer zones: - box.buffers = box.owned & allbuffers; box.buffers.normalize(); - - // Active region: - box.active = box.owned - box.buffers; box.active.normalize(); @@ -431,22 +485,29 @@ regrid () // The conjunction of all buffer zones must equal allbuffers + // cerr << "QQQ: regrid[d]" << endl; ibset allbuffers1; for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); + full_dboxes const & box = full_level.AT(c); allbuffers1 += box.buffers; } allbuffers1.normalize(); ASSERT_rl (allbuffers1 == allbuffers, "Buffer zone consistency check"); + timer_buffers.stop(); + // Test constituency relations: + // cerr << "QQQ: regrid[e]" << endl; + + static Carpet::Timer timer_test ("CarpetLib::dh::regrid::test"); + timer_test.start(); for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); + full_dboxes const & box = full_level.AT(c); ASSERT_c ((box.active & box.buffers).empty(), "Consistency check"); @@ -473,24 +534,36 @@ regrid () } // for c + timer_test.stop(); + // Communication schedule: + // cerr << "QQQ: regrid[4]" << endl; - for (int c = 0; c < h.components(rl); ++ c) { + static Carpet::Timer timer_comm ("CarpetLib::dh::regrid::comm"); + timer_comm.start(); + + for (int lc = 0; lc < h.local_components(rl); ++ lc) { + int const c = h.get_component (rl, lc); + // cerr << "QQQ: regrid[4a] lc=" << lc << " c=" << c << endl; - dboxes & box = boxes.AT(ml).AT(rl).AT(c); + full_dboxes & box = full_level.AT(c); // Multigrid restriction: + static Carpet::Timer timer_comm_mgrest + ("CarpetLib::dh::regrid::comm::mgrest"); + timer_comm_mgrest.start(); + if (ml > 0) { int const oml = ml - 1; // Multigrid restriction must fill all active points - dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); + full_dboxes const & obox = full_boxes.AT(oml).AT(rl).AT(c); ibset needrecv = box.active; @@ -513,11 +586,8 @@ regrid () ibbox const send = recv.expanded_for (obox.interior); ASSERT_c (send <= obox.exterior, "Multigrid restriction: Send region must be contained in exterior"); - if (on_this_proc (rl, c)) { - int const p = dist::rank(); - fast_level.AT(p).fast_mg_rest_sendrecv.push_back - (sendrecv_pseudoregion_t (send, c, recv, c)); - } + fast_level.fast_mg_rest_sendrecv.push_back + (sendrecv_pseudoregion_t (send, c, recv, c)); } needrecv -= ovlp; @@ -529,9 +599,16 @@ regrid () } // if ml > 0 + timer_comm_mgrest.stop(); + // Multigrid prolongation: + // cerr << "QQQ: regrid[f]" << endl; + + static Carpet::Timer timer_comm_mgprol + ("CarpetLib::dh::regrid::comm::mprol"); + timer_comm_mgprol.start(); if (ml > 0) { int const oml = ml - 1; @@ -539,7 +616,7 @@ regrid () // Multigrid prolongation must fill all active points // (this could probably be relaxed) - dboxes const & obox = boxes.AT(oml).AT(rl).AT(c); + full_dboxes const & obox = full_boxes.AT(oml).AT(rl).AT(c); ibset oneedrecv = obox.active; @@ -565,11 +642,8 @@ regrid () recv.expanded_for (box.interior).expand (stencil_size); ASSERT_c (send <= box.exterior, "Multigrid prolongation: Send region must be contained in exterior"); - if (on_this_proc (rl, c)) { - int const p = dist::rank(); - fast_level.AT(p).fast_mg_prol_sendrecv.push_back - (sendrecv_pseudoregion_t (send, c, recv, c)); - } + fast_level.fast_mg_prol_sendrecv.push_back + (sendrecv_pseudoregion_t (send, c, recv, c)); } oneedrecv -= ovlp; @@ -581,9 +655,16 @@ regrid () } // if ml > 0 + timer_comm_mgprol.stop(); + // Refinement prolongation: + // cerr << "QQQ: regrid[g]" << endl; + + static Carpet::Timer timer_comm_refprol + ("CarpetLib::dh::regrid::comm::refprol"); + timer_comm_refprol.start(); if (rl > 0) { int const orl = rl - 1; @@ -600,7 +681,7 @@ regrid () i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc); ibset contracted_oactive; for (ibset::const_iterator @@ -617,16 +698,19 @@ regrid () ovlp.normalize(); for (ibset::const_iterator - ri =ovlp.begin(); ri != ovlp.end(); ++ ri) + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) { ibbox const & recv = * ri; ibbox const send = recv.expanded_for (obox.interior).expand (stencil_size); ASSERT_c (send <= obox.exterior, "Refinement prolongation: Send region must be contained in exterior"); - if (on_this_proc (rl, c) or on_this_proc (orl, cc)) { - int const p = dist::rank(); - fast_level.AT(p).fast_ref_prol_sendrecv.push_back + fast_level.fast_ref_prol_sendrecv.push_back + (sendrecv_pseudoregion_t (send, cc, recv, c)); + if (not on_this_proc (orl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(orl, cc)); + fast_level_otherproc.fast_ref_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } @@ -635,83 +719,120 @@ regrid () } // for cc - needrecv.normalize(); - // All points must have been received + needrecv.normalize(); ASSERT_c (needrecv.empty(), "Refinement prolongation: All points must have been received"); } // if rl > 0 + timer_comm_refprol.stop(); + // Synchronisation: + // cerr << "QQQ: regrid[h]" << endl; - // Synchronisation should fill as many boundary points as - // possible + static Carpet::Timer timer_comm_sync + ("CarpetLib::dh::regrid::comm::sync"); + timer_comm_sync.start(); + { + + // Synchronisation should fill as many boundary points as + // possible + #if 0 - // Outer boundaries are not synchronised, since they cannot be - // filled by boundary prolongation either, and therefore the - // user code must set them anyway. - ibset needrecv = box.boundaries; + // Outer boundaries are not synchronised, since they cannot + // be filled by boundary prolongation either, and therefore + // the user code must set them anyway. + ibset needrecv = box.boundaries; #else - // Outer boundaries are synchronised for backward - // compatibility. - ibset needrecv = box.ghosts; + // Outer boundaries are synchronised for backward + // compatibility. + ibset needrecv = box.ghosts; #endif - - ibset & sync = box.sync; - - for (int cc = 0; cc < h.components(rl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc); + ibset const needrecv_orig = needrecv; + ibset & sync = box.sync; + + for (int cc = 0; cc < h.components(rl); ++ cc) { + full_dboxes const & obox = full_level.AT(cc); + #if 0 - ibset ovlp = needrecv & obox.owned; + ibset ovlp = needrecv & obox.owned; #else - ibset ovlp = needrecv & obox.interior; + ibset ovlp = needrecv & obox.interior; #endif - ovlp.normalize(); - - if (cc == c) { - ASSERT_cc (ovlp.empty(), - "A region may not synchronise from itself"); - } - - for (ibset::const_iterator - ri = ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - ibbox const & send = recv; - if (on_this_proc (rl, c) or on_this_proc (rl, cc)) { - int const p = dist::rank(); - fast_level.AT(p).fast_sync_sendrecv.push_back + ovlp.normalize(); + + if (cc == c) { + ASSERT_cc (ovlp.empty(), + "A region may not synchronise from itself"); + } + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + ibbox const & send = recv; + fast_level.fast_sync_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); + if (not on_this_proc (rl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(rl, cc)); + fast_level_otherproc.fast_sync_sendrecv.push_back + (sendrecv_pseudoregion_t (send, cc, recv, c)); + } } - } + + needrecv -= ovlp; + sync += ovlp; + + } // for cc - needrecv -= ovlp; - sync += ovlp; + sync.normalize(); - } // for cc + } - needrecv.normalize(); - sync.normalize(); + timer_comm_sync.stop(); // Boundary prolongation: + // cerr << "QQQ: regrid[i]" << endl; + + static Carpet::Timer timer_comm_refbndprol + ("CarpetLib::dh::regrid::comm::refbndprol"); + timer_comm_refbndprol.start(); if (rl > 0) { int const orl = rl - 1; +#if 0 + // Outer boundaries are not synchronised, since they cannot + // be filled by boundary prolongation either, and therefore + // the user code must set them anyway. + ibset needrecv = box.boundaries; +#else + // Outer boundaries are synchronised for backward + // compatibility. + ibset needrecv = box.ghosts; +#endif + + // Points which are synchronised need not be boundary + // prolongated + needrecv -= box.sync; + // Outer boundary points cannot be boundary prolongated needrecv &= box.communicated; // Prolongation must fill what cannot be synchronised, and // also all buffer zones needrecv += box.buffers; + needrecv.normalize(); + ibset const needrecv_orig = needrecv; ibset & bndref = box.bndref; @@ -721,9 +842,10 @@ regrid () "Refinement factors must be integer multiples of each other"); i2vect const reffact = i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); + ivect const reffact1 = h.reffacts.at(rl) / h.reffacts.at(orl); for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc); ibset contracted_oactive; for (ibset::const_iterator @@ -747,251 +869,522 @@ regrid () recv.expanded_for (obox.interior).expand (stencil_size); ASSERT_c (send <= obox.exterior, "Boundary prolongation: Send region must be contained in exterior"); - if (on_this_proc (rl, c) or on_this_proc (orl, cc)) { - int const p = dist::rank(); - fast_level.AT(p).fast_ref_bnd_prol_sendrecv.push_back + fast_level.fast_ref_bnd_prol_sendrecv.push_back + (sendrecv_pseudoregion_t (send, cc, recv, c)); + if (not on_this_proc (orl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(orl, cc)); + fast_level_otherproc.fast_ref_bnd_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); } } needrecv -= ovlp; bndref += ovlp; - + } // for cc - needrecv.normalize(); bndref.normalize(); + // All points must now have been received, either through + // synchronisation or through boundary prolongation + needrecv.normalize(); + ASSERT_c (needrecv.empty(), + "Synchronisation and boundary prolongation: All points must have been received"); + } // if rl > 0 - // All points must now have been received, either through - // synchronisation or through boundary prolongation - ASSERT_c (needrecv.empty(), - "Synchronisation and boundary prolongation: All points must have been received"); + timer_comm_refbndprol.stop(); - } // for c + } // for lc // Refinement restriction: + // cerr << "QQQ: regrid[j]" << endl; + + static Carpet::Timer timer_comm_refrest + ("CarpetLib::dh::regrid::comm::refrest"); + timer_comm_refrest.start(); if (rl > 0) { int const orl = rl - 1; - fast_cboxes & fast_olevel = fast_boxes.AT(ml).AT(orl); - - ibset needrecv; - for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); - dboxes const & obox0 = boxes.AT(ml).AT(orl).AT(0); - - // Refinement restriction may fill all active points, and - // must use all active points - - for (ibset::const_iterator - ai = box.active.begin(); ai != box.active.end(); ++ ai) - { - ibbox const & active = * ai; - needrecv += active.contracted_for (obox0.interior); - } - needrecv.normalize(); - } // for c + fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl); - for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes & obox = boxes.AT(ml).AT(orl).AT(cc); - - for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); + if (h.components(orl) > 0) { + for (int lc = 0; lc < h.local_components(rl); ++ lc) { + int const c = h.get_component (rl, lc); + + full_dboxes const & box = full_level.AT(c); + full_dboxes const & obox0 = full_boxes.AT(ml).AT(orl).AT(0); + + // Refinement restriction may fill all active points, and + // must use all active points - ibset contracted_active; + ibset needrecv; for (ibset::const_iterator ai = box.active.begin(); ai != box.active.end(); ++ ai) { ibbox const & active = * ai; - contracted_active += active.contracted_for (obox.interior); + needrecv += active.contracted_for (obox0.interior); } - contracted_active.normalize(); + needrecv.normalize(); - ibset ovlp = obox.active & contracted_active; - ovlp.normalize(); - - for (ibset::const_iterator - ri =ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - ibbox const send = recv.expanded_for (box.interior); - ASSERT_c (send <= box.active, - "Refinement restriction: Send region must be contained in active part"); - if (on_this_proc (rl, c) or on_this_proc (orl, cc)) { - int const p = dist::rank(); - fast_olevel.AT(p).fast_ref_rest_sendrecv.push_back + for (int cc = 0; cc < h.components(orl); ++ cc) { + full_dboxes & obox = full_boxes.AT(ml).AT(orl).AT(cc); + + ibset contracted_active; + for (ibset::const_iterator + ai = box.active.begin(); ai != box.active.end(); ++ ai) + { + ibbox const & active = * ai; + contracted_active += active.contracted_for (obox0.interior); + } + contracted_active.normalize(); + + ibset ovlp = obox.active & contracted_active; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + ibbox const send = recv.expanded_for (box.interior); + ASSERT_c (send <= box.active, + "Refinement restriction: Send region must be contained in active part"); + fast_olevel.fast_ref_rest_sendrecv.push_back (sendrecv_pseudoregion_t (send, c, recv, cc)); + if (not on_this_proc (orl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(orl, cc)); + fast_level_otherproc.fast_ref_rest_sendrecv.push_back + (sendrecv_pseudoregion_t (send, c, recv, cc)); + } } - } - - needrecv -= ovlp; - } // for c - - } // for cc - - needrecv.normalize(); - - // All points must have been received - ASSERT_rl (needrecv.empty(), - "Refinement restriction: All points must have been received"); + needrecv -= ovlp; + + } // for cc + + // All points must have been received + needrecv.normalize(); + ASSERT_rl (needrecv.empty(), + "Refinement restriction: All points must have been received"); + + } // for lc + } // if orl not empty } // if rl > 0 + timer_comm_refrest.stop(); + + timer_comm.stop(); + // Regridding schedule: + // cerr << "QQQ: regrid[5]" << endl; - for (int c = 0; c < h.components(rl); ++ c) { - - dboxes & box = boxes.AT(ml).AT(rl).AT(c); - - ibset needrecv = box.active; + fast_level.do_init = do_init; + if (do_init) { + static Carpet::Timer timer_regrid ("CarpetLib::dh::regrid::regrid"); + timer_regrid.start(); + for (int lc = 0; lc < h.local_components(rl); ++ lc) { + int const c = h.get_component (rl, lc); + // cerr << "QQQ: regrid[5a] lc=" << lc << " c=" << c << endl; - // Synchronisation: - - if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) { + full_dboxes & box = full_level.AT(c); - int const oldcomponents = oldboxes.AT(ml).AT(rl).size(); + ibset needrecv = box.active; - // Synchronisation copies from the same level of the old - // grid structure. It should fill as many active points as - // possible - for (int cc = 0; cc < oldcomponents; ++ cc) { - dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc); + + // Synchronisation: + // cerr << "QQQ: regrid[k]" << endl; + + static Carpet::Timer timer_regrid_sync + ("CarpetLib::dh::regrid::regrid::sync"); + timer_regrid_sync.start(); + + if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) + { - ibset ovlp = needrecv & obox.owned; - ovlp.normalize(); + int const oldcomponents = oldboxes.AT(ml).AT(rl).size(); - for (ibset::const_iterator - ri =ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - ibbox const & send = recv; - if (on_this_proc (rl, c) or on_this_oldproc (rl, cc)) { - int const p = dist::rank(); - fast_level.AT(p).fast_old2new_sync_sendrecv.push_back + // Synchronisation copies from the same level of the old + // grid structure. It should fill as many active points + // as possible. + + for (int cc = 0; cc < oldcomponents; ++ cc) { + dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc); + + ibset ovlp = needrecv & obox.owned; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + ibbox const & send = recv; + fast_level.fast_old2new_sync_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); + if (not on_this_oldproc (rl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(rl, cc)); + fast_level_otherproc.fast_old2new_sync_sendrecv.push_back + (sendrecv_pseudoregion_t (send, cc, recv, c)); + } } - } + + needrecv -= ovlp; + + } // for cc - needrecv -= ovlp; + needrecv.normalize(); - } // for cc + } // if not oldboxes.empty - needrecv.normalize(); - - } // if not oldboxes.empty - - - - // Prolongation: - - if (rl > 0) { - int const orl = rl - 1; + timer_regrid_sync.stop(); - // Prolongation interpolates from the next coarser level of - // the new grid structure. It must fill what cannot be - // synchronised - - i2vect const stencil_size = i2vect (prolongation_stencil_size()); - ASSERT_c (all (h.reffacts.at(rl) % h.reffacts.at(orl) == 0), - "Refinement factors must be integer multiples of each other"); - i2vect const reffact = - i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); - for (int cc = 0; cc < h.components(orl); ++ cc) { - dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc); + // Prolongation: + // cerr << "QQQ: regrid[l]" << endl; + + static Carpet::Timer timer_regrid_prolongate + ("CarpetLib::dh::regrid::regrid::prolongate"); + timer_regrid_prolongate.start(); + + if (rl > 0) { + int const orl = rl - 1; - ibset contracted_oactive; - for (ibset::const_iterator - ai = obox.active.begin(); ai != obox.active.end(); ++ ai) - { - ibbox const & oactive = * ai; - // untested for cell centering - contracted_oactive += - oactive.contracted_for (box.interior).expand (reffact); - } - contracted_oactive.normalize(); + // Prolongation interpolates from the next coarser level + // of the new grid structure. It must fill what cannot be + // synchronised. - ibset ovlp = needrecv & contracted_oactive; - ovlp.normalize(); + i2vect const stencil_size = i2vect (prolongation_stencil_size()); - for (ibset::const_iterator - ri = ovlp.begin(); ri != ovlp.end(); ++ ri) - { - ibbox const & recv = * ri; - ibbox const send = - recv.expanded_for (obox.interior).expand (stencil_size); - ASSERT_c (send <= obox.exterior, - "Regridding prolongation: Send region must be contained in exterior"); - if (on_this_proc (rl, c) or on_this_proc (orl, cc)) { - int const p = dist::rank(); - fast_level.AT(p).fast_old2new_ref_prol_sendrecv.push_back + ASSERT_c (all (h.reffacts.at(rl) % h.reffacts.at(orl) == 0), + "Refinement factors must be integer multiples of each other"); + i2vect const reffact = + i2vect (h.reffacts.at(rl) / h.reffacts.at(orl)); + + for (int cc = 0; cc < h.components(orl); ++ cc) { + full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc); + + ibset contracted_oactive; + for (ibset::const_iterator + ai = obox.active.begin(); ai != obox.active.end(); ++ ai) + { + ibbox const & oactive = * ai; + // untested for cell centering + contracted_oactive += + oactive.contracted_for (box.interior).expand (reffact); + } + contracted_oactive.normalize(); + + ibset ovlp = needrecv & contracted_oactive; + ovlp.normalize(); + + for (ibset::const_iterator + ri = ovlp.begin(); ri != ovlp.end(); ++ ri) + { + ibbox const & recv = * ri; + ibbox const send = + recv.expanded_for (obox.interior).expand (stencil_size); + ASSERT_c (send <= obox.exterior, + "Regridding prolongation: Send region must be contained in exterior"); + fast_level.fast_old2new_ref_prol_sendrecv.push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); + if (not on_this_proc (orl, cc)) { + fast_dboxes & fast_level_otherproc = + fast_level_otherprocs.AT(this_proc(orl, cc)); + fast_level_otherproc.fast_old2new_ref_prol_sendrecv. + push_back (sendrecv_pseudoregion_t (send, cc, recv, c)); + } } - } + + needrecv -= ovlp; + + } // for cc - needrecv -= ovlp; + needrecv.normalize(); - } // for cc + } // if rl > 0 - needrecv.normalize(); + if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > 0) { + // All points must now have been received, either through + // synchronisation or through prolongation + ASSERT_c (needrecv.empty(), + "Regridding prolongation: All points must have been received"); + } - } // if rl > 0 + timer_regrid_prolongate.stop(); + + } // for lc - if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > 0) { - // All points must now have been received, either through - // synchronisation or through prolongation - ASSERT_c (needrecv.empty(), - "Regridding prolongation: All points must have been received"); + timer_regrid.stop(); + + } // if do_init + + + + // cerr << "QQQ: regrid[6]" << endl; + for (int lc = 0; lc < h.local_components(rl); ++ lc) { + int const c = h.get_component (rl, lc); + + level.AT(c).exterior = full_level.AT(c).exterior; + level.AT(c).owned = full_level.AT(c).owned; + level.AT(c).interior = full_level.AT(c).interior; + + level.AT(c).exterior_size = full_level.AT(c).exterior.size(); + level.AT(c).owned_size = full_level.AT(c).owned.size(); + level.AT(c).active_size = full_level.AT(c).active.size(); + + } // for lc + + + + // Broadcast grid structure and communication schedule + // cerr << "QQQ: regrid[7]" << endl; + + { + + static Carpet::Timer timer_bcast_boxes + ("CarpetLib::dh::regrid::bcast_boxes"); + timer_bcast_boxes.start(); + + int const count_send = h.local_components(rl); + vector level_send (count_send); + for (int lc = 0; lc < h.local_components(rl); ++ lc) { + int const c = h.get_component (rl, lc); + level_send.AT(lc) = level.AT(c); + } + // cerr << "QQQ: regrid[7a]" << endl; + vector > const level_recv = + allgatherv (dist::comm(), level_send); + // cerr << "QQQ: regrid[7b]" << endl; + vector count_recv (dist::size(), 0); + for (int c = 0; c < h.components(rl); ++ c) { + int const p = this_proc (rl, c); + if (p != dist::rank()) { + level.AT(c) = level_recv.AT(p).AT(count_recv.AT(p)); + ++ count_recv.AT(p); + } + } + for (int p = 0; p < dist::size(); ++ p) { + if (p != dist::rank()) { + assert (count_recv.AT(p) == int(level_recv.AT(p).size())); + } } + // cerr << "QQQ: regrid[7c]" << endl; - } // for c + timer_bcast_boxes.stop(); + + } - } // for rl - } // for m - - - - // Output: - if (output_bboxes or there_was_an_error) { - - for (int ml = 0; ml < h.mglevels(); ++ ml) { - for (int rl = 0; rl < h.reflevels(); ++ rl) { + { + + static Carpet::Timer timer_bcast_comm + ("CarpetLib::dh::regrid::bcast_comm"); + timer_bcast_comm.start(); + + static Carpet::Timer timer_bcast_comm_ref_prol + ("CarpetLib::dh::regrid::bcast_comm::ref_prol"); + timer_bcast_comm_ref_prol.start(); + broadcast_schedule (fast_level_otherprocs, fast_level, + & fast_dboxes::fast_ref_prol_sendrecv); + timer_bcast_comm_ref_prol.stop(); + + static Carpet::Timer timer_bcast_comm_sync + ("CarpetLib::dh::regrid::bcast_comm::sync"); + timer_bcast_comm_sync.start(); + broadcast_schedule (fast_level_otherprocs, fast_level, + & fast_dboxes::fast_sync_sendrecv); + timer_bcast_comm_sync.stop(); + + static Carpet::Timer timer_bcast_comm_ref_bnd_prol + ("CarpetLib::dh::regrid::bcast_comm::ref_bnd_prol"); + timer_bcast_comm_ref_bnd_prol.start(); + broadcast_schedule (fast_level_otherprocs, fast_level, + & fast_dboxes::fast_ref_bnd_prol_sendrecv); + timer_bcast_comm_ref_bnd_prol.stop(); + + if (rl > 0) { + int const orl = rl - 1; + fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl); + static Carpet::Timer timer_bcast_comm_ref_rest + ("CarpetLib::dh::regrid::bcast_comm::ref_rest"); + timer_bcast_comm_ref_rest.start(); + broadcast_schedule (fast_level_otherprocs, fast_olevel, + & fast_dboxes::fast_ref_rest_sendrecv); + timer_bcast_comm_ref_rest.stop(); + } + + // TODO: Maybe broadcast old2new schedule only if do_init is + // set + static Carpet::Timer timer_bcast_comm_old2new_sync + ("CarpetLib::dh::regrid::bcast_comm::old2new_sync"); + timer_bcast_comm_old2new_sync.start(); + broadcast_schedule (fast_level_otherprocs, fast_level, + & fast_dboxes::fast_old2new_sync_sendrecv); + timer_bcast_comm_old2new_sync.stop(); + + static Carpet::Timer timer_bcast_comm_old2new_ref_prol + ("CarpetLib::dh::regrid::bcast_comm::old2new_ref_prol"); + timer_bcast_comm_old2new_ref_prol.start(); + broadcast_schedule (fast_level_otherprocs, fast_level, + & fast_dboxes::fast_old2new_ref_prol_sendrecv); + timer_bcast_comm_old2new_ref_prol.stop(); + + timer_bcast_comm.stop(); + + } + + + + // Output: + if (output_bboxes or there_was_an_error) { + for (int c = 0; c < h.components(rl); ++ c) { - dboxes const & box = boxes.AT(ml).AT(rl).AT(c); - fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl).AT(c); + full_dboxes const & box = full_boxes.AT(ml).AT(rl).AT(c); cout << eol; cout << "ml=" << ml << " rl=" << rl << " c=" << c << eol; cout << box; - cout << fast_box; - cout << endl; } // for c - } // for rl - } // for m + + fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl); + + cout << eol; + cout << "ml=" << ml << " rl=" << rl << eol; + cout << fast_box; + + } // if output_bboxes + + + + // Free memory early to save space + if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) { + oldboxes.AT(ml).AT(rl).clear(); + } + + if (ml > 0) { + if (rl > 0) { + full_boxes.AT(ml-1).AT(rl-1).clear(); + } + if (rl == h.reflevels()-1) { + full_boxes.AT(ml-1).AT(rl).clear(); + } + } + if (ml == h.mglevels()-1) { + if (rl > 0) { + full_boxes.AT(ml).AT(rl-1).clear(); + } + if (rl == h.reflevels()-1) { + full_boxes.AT(ml).AT(rl).clear(); + } + } + + } // for rl + + if (ml > 0) { + full_boxes.AT(ml-1).clear(); + } + if (ml == h.mglevels()-1) { + full_boxes.AT(ml).clear(); + } + + } // for ml + + + + // Output: + if (output_bboxes or there_was_an_error) { + + cout << eol; + cout << "memoryof(gh)=" << memoryof(h) << eol; + cout << "memoryof(dh)=" << memoryof(*this) << eol; + cout << "memoryof(dh.boxes)=" << memoryof(boxes) << eol; + cout << "memoryof(dh.fast_boxes)=" << memoryof(fast_boxes) << eol; + int gfcount = 0; + size_t gfmemory = 0; + for (list::const_iterator + gfi = gfs.begin(); gfi != gfs.end(); ++ gfi) + { + ++ gfcount; + gfmemory += memoryof(**gfi); + } + cout << "#gfs=" << gfcount << eol; + cout << "memoryof(gfs)=" << gfmemory << eol; } // if output_bboxes if (there_was_an_error) { CCTK_WARN (CCTK_WARN_ABORT, - "The grid structure is inconsistent. " - "It is impossible to continue."); + "The grid structure is inconsistent. It is impossible to continue."); } total.stop (0); + timer.stop(); +} + + + +void +dh:: +broadcast_schedule (vector & fast_level_otherprocs, + fast_dboxes & fast_level, + srpvect fast_dboxes::* const schedule_item) +{ + // cerr << "QQQ: broadcast_schedule[1]" << endl; + static Carpet::Timer timer_bs1 ("CarpetLib::dh::bs1"); + timer_bs1.start(); + vector send (dist::size()); + for (int p=0; p=0 and rl::iterator f=gfs.begin(); f!=gfs.end(); ++f) { @@ -1017,11 +1410,21 @@ recompose (int const rl, bool const do_prolongate) for (list::iterator f=gfs.begin(); f!=gfs.end(); ++f) { (*f)->recompose_allocate (rl); } +#warning "TODO: If this works, rename do_prolongate to do_init here, and remove the do_prolongate parameter from ggf::recompose_fill" +#if 0 for (comm_state state; not state.done(); state.step()) { for (list::iterator f=gfs.begin(); f!=gfs.end(); ++f) { (*f)->recompose_fill (state, rl, do_prolongate); } } +#endif + if (do_prolongate) { + for (comm_state state; not state.done(); state.step()) { + for (list::iterator f=gfs.begin(); f!=gfs.end(); ++f) { + (*f)->recompose_fill (state, rl, true); + } + } + } for (list::iterator f=gfs.begin(); f!=gfs.end(); ++f) { (*f)->recompose_free_old (rl); } @@ -1030,33 +1433,144 @@ recompose (int const rl, bool const do_prolongate) // but requires less memory. This is the default. for (list::iterator f=gfs.begin(); f!=gfs.end(); ++f) { (*f)->recompose_allocate (rl); +#if 0 for (comm_state state; not state.done(); state.step()) { (*f)->recompose_fill (state, rl, do_prolongate); } +#endif + if (do_prolongate) { + for (comm_state state; not state.done(); state.step()) { + (*f)->recompose_fill (state, rl, true); + } + } (*f)->recompose_free_old (rl); } } - timer.stop (0); + timer.stop (); } // Grid function management -void +dh::ggf_handle dh:: add (ggf * const f) { CHECKPOINT; - gfs.push_back (f); + return gfs.insert (gfs.end(), f); } void dh:: -remove (ggf * const f) +erase (ggf_handle const fi) { CHECKPOINT; - gfs.remove (f); + gfs.erase (fi); +} + + + +// Equality + +bool +dh::full_dboxes:: +operator== (full_dboxes const & b) const +{ + return + exterior == b.exterior and + all(all(is_outer_boundary == b.is_outer_boundary)) and + outer_boundaries == b.outer_boundaries and + communicated == b.communicated and + boundaries == b.boundaries and + owned == b.owned and + buffers == b.buffers and + active == b.active and + sync == b.sync and + bndref == b.bndref and + ghosts == b.ghosts and + interior == b.interior; +} + + + +// MPI datatypes + +MPI_Datatype +mpi_datatype (dh::dboxes const &) +{ + static bool initialised = false; + static MPI_Datatype newtype; + if (not initialised) { + static dh::dboxes s; +#define ENTRY(type, name) \ + { \ + sizeof s.name / sizeof(type), /* count elements */ \ + (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \ + dist::mpi_datatype(), /* find MPI datatype */ \ + STRINGIFY(name), /* field name */ \ + STRINGIFY(type), /* type name */ \ + } + dist::mpi_struct_descr_t const descr[] = { + ENTRY(int, exterior), + ENTRY(int, owned), + ENTRY(int, interior), + ENTRY(dh::dboxes::size_type, exterior_size), + ENTRY(dh::dboxes::size_type, owned_size), + ENTRY(dh::dboxes::size_type, active_size), + {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"} + }; +#undef ENTRY + newtype = + dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr, + "dh::dboxes", sizeof s); +#if 0 + int type_size; + MPI_Type_size (newtype, & type_size); + assert (type_size <= sizeof s); + MPI_Aint type_lb, type_ub; + MPI_Type_lb (newtype, & type_lb); + MPI_Type_ub (newtype, & type_ub); + assert (type_ub - type_lb == sizeof s); +#endif + initialised = true; + } + return newtype; +} + +MPI_Datatype +mpi_datatype (dh::fast_dboxes const &) +{ + static bool initialised = false; + static MPI_Datatype newtype; + if (not initialised) { + static dh::fast_dboxes s; +#define ENTRY(type, name) \ + { \ + sizeof s.name / sizeof(type), /* count elements */ \ + (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \ + dist::mpi_datatype(), /* find MPI datatype */ \ + STRINGIFY(name), /* field name */ \ + STRINGIFY(type), /* type name */ \ + } + dist::mpi_struct_descr_t const descr[] = { + ENTRY (dh::srpvect, fast_mg_rest_sendrecv), + ENTRY (dh::srpvect, fast_mg_prol_sendrecv), + ENTRY (dh::srpvect, fast_ref_prol_sendrecv), + ENTRY (dh::srpvect, fast_ref_rest_sendrecv), + ENTRY (dh::srpvect, fast_sync_sendrecv), + ENTRY (dh::srpvect, fast_ref_bnd_prol_sendrecv), + ENTRY (dh::srpvect, fast_old2new_sync_sendrecv), + ENTRY (dh::srpvect, fast_old2new_ref_prol_sendrecv), + {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"} + }; +#undef ENTRY + newtype = + dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr, + "dh::fast_dboxes", sizeof s); + initialised = true; + } + return newtype; } @@ -1069,17 +1583,43 @@ memory () const { return + sizeof alldhi + // memoryof (alldhi) + + sizeof & h + // memoryof (& h) + + sizeof gh_handle + // memoryof (gh_handle) + memoryof (ghost_width) + memoryof (buffer_width) + memoryof (prolongation_order_space) + memoryof (boxes) + memoryof (fast_boxes) + - memoryof (fast_oldboxes) + memoryof (gfs); } +size_t +dh:: +allmemory () +{ + size_t mem = memoryof(alldh); + for (list::const_iterator + dhi = alldh.begin(); dhi != alldh.end(); ++ dhi) + { + mem += memoryof(**dhi); + } + return mem; +} + size_t dh::dboxes:: +memory () + const +{ + return + memoryof (exterior) + + memoryof (owned) + + memoryof (interior); +} + +size_t +dh::full_dboxes:: memory () const { @@ -1116,6 +1656,135 @@ memory () +// Input + +istream & +dh::dboxes:: +input (istream & is) +{ + // Regions: + try { + skipws (is); + consume (is, "dh::dboxes:{"); + skipws (is); + consume (is, "exterior:"); + is >> exterior; + exterior_size = exterior.size(); + skipws (is); + consume (is, "owned:"); + is >> owned; + owned_size = owned.size(); + skipws (is); + consume (is, "interior:"); + is >> interior; + skipws (is); + consume (is, "active_size:"); + is >> active_size; + skipws (is); + consume (is, "}"); + } catch (input_error & err) { + cout << "Input error while reading a dh::full_dboxes" << endl; + throw err; + } + return is; +} + +istream & +dh::full_dboxes:: +input (istream & is) +{ + // Regions: + try { + skipws (is); + consume (is, "dh::full_dboxes:{"); + skipws (is); + consume (is, "exterior:"); + is >> exterior; + skipws (is); + consume (is, "is_outer_boundary:"); + is >> is_outer_boundary; + skipws (is); + consume (is, "outer_boundaries:"); + is >> outer_boundaries; + skipws (is); + consume (is, "communicated:"); + is >> communicated; + skipws (is); + consume (is, "boundaries:"); + is >> boundaries; + skipws (is); + consume (is, "owned:"); + is >> owned; + skipws (is); + consume (is, "buffers:"); + is >> buffers; + skipws (is); + consume (is, "active:"); + is >> active; + skipws (is); + consume (is, "sync:"); + is >> sync; + skipws (is); + consume (is, "bndref:"); + is >> bndref; + skipws (is); + consume (is, "ghosts:"); + is >> ghosts; + skipws (is); + consume (is, "interior:"); + is >> interior; + skipws (is); + consume (is, "}"); + } catch (input_error & err) { + cout << "Input error while reading a dh::full_dboxes" << endl; + throw err; + } + return is; +} + +istream & +dh::fast_dboxes:: +input (istream & is) +{ + // Communication schedule: + try { + skipws (is); + consume (is, "dh::fast_dboxes:{"); + skipws (is); + consume (is, "fast_mg_rest_sendrecv:"); + is >> fast_mg_rest_sendrecv; + skipws (is); + consume (is, "fast_mg_prol_sendrecv:"); + is >> fast_mg_prol_sendrecv; + skipws (is); + consume (is, "fast_ref_prol_sendrecv:"); + is >> fast_ref_prol_sendrecv; + skipws (is); + consume (is, "fast_ref_rest_sendrecv:"); + is >> fast_ref_rest_sendrecv; + skipws (is); + consume (is, "fast_sync_sendrecv:"); + is >> fast_sync_sendrecv; + skipws (is); + consume (is, "fast_ref_bnd_prol_sendrecv:"); + is >> fast_ref_bnd_prol_sendrecv; + skipws (is); + consume (is, "fast_old2new_sync_sendrecv:"); + is >> fast_old2new_sync_sendrecv; + skipws (is); + consume (is, "fast_old2new_ref_prol_sendrecv:"); + is >> fast_old2new_ref_prol_sendrecv; + skipws (is); + consume (is, "}"); + } catch (input_error & err) { + cout << "Input error while reading a dh::fast_dboxes" << endl; + throw err; + } + return is; +} + + + // Output ostream & @@ -1149,19 +1818,35 @@ output (ostream & os) const { // Regions: - os << "dh::dboxes:" << eol; - os << "exterior:" << exterior << eol; - os << "is_outer_boundary:" << is_outer_boundary << eol; - os << "outer_boundaries:" << outer_boundaries << eol; - os << "communicated:" << communicated << eol; - os << "boundaries:" << boundaries << eol; - os << "owned:" << owned << eol; - os << "buffers:" << buffers << eol; - os << "active:" << active << eol; - os << "sync:" << sync << eol; - os << "bndref:" << bndref << eol; - os << "ghosts:" << ghosts << eol; - os << "interior:" << interior << eol; + os << "dh::dboxes:{" << eol + << " exterior: " << exterior << eol + << " owned: " << owned << eol + << " interior: " << interior << eol + << " active_size: " << active_size << eol + << "}" << eol; + return os; +} + +ostream & +dh::full_dboxes:: +output (ostream & os) + const +{ + // Regions: + os << "dh::full_dboxes:{" << eol + << " exterior: " << exterior << eol + << " is_outer_boundary: " << is_outer_boundary << eol + << " outer_boundaries: " << outer_boundaries << eol + << " communicated: " << communicated << eol + << " boundaries: " << boundaries << eol + << " owned: " << owned << eol + << " buffers: " << buffers << eol + << " active: " << active << eol + << " sync: " << sync << eol + << " bndref: " << bndref << eol + << " ghosts: " << ghosts << eol + << " interior: " << interior << eol + << "}" << eol; return os; } @@ -1171,14 +1856,15 @@ output (ostream & os) const { // Communication schedule: - os << "dh::fast_dboxes:" << eol; - os << "fast_mg_rest_sendrecv: " << fast_mg_rest_sendrecv << eol; - os << "fast_mg_prol_sendrecv: " << fast_mg_prol_sendrecv << eol; - os << "fast_ref_prol_sendrecv: " << fast_ref_prol_sendrecv << eol; - os << "fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol; - os << "fast_sync_sendrecv: " << fast_sync_sendrecv << eol; - os << "fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol; - os << "fast_old2new_sync_sendrecv:" << fast_old2new_sync_sendrecv << eol; - os << "fast_old2new_ref_prol_sendrecv:" << fast_old2new_ref_prol_sendrecv << eol; + os << "dh::fast_dboxes:{" << eol + << " fast_mg_rest_sendrecv: " << fast_mg_rest_sendrecv << eol + << " fast_mg_prol_sendrecv: " << fast_mg_prol_sendrecv << eol + << " fast_ref_prol_sendrecv: " << fast_ref_prol_sendrecv << eol + << " fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol + << " fast_sync_sendrecv: " << fast_sync_sendrecv << eol + << " fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol + << " fast_old2new_sync_sendrecv: " << fast_old2new_sync_sendrecv << eol + << " fast_old2new_ref_prol_sendrecv: " << fast_old2new_ref_prol_sendrecv << eol + << "}" << eol; return os; } diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh index 078e0b725..93a29f83b 100644 --- a/Carpet/CarpetLib/src/dh.hh +++ b/Carpet/CarpetLib/src/dh.hh @@ -24,9 +24,13 @@ class ggf; class dh; + // A data hierarchy (grid hierarchy plus ghost zones) class dh { + static list alldh; + list::iterator alldhi; + // Types public: typedef list iblist; @@ -41,6 +45,23 @@ public: // Region description: + ibbox exterior; // whole region (including boundaries) + ibbox owned; // evolved in time + ibbox interior; // interior (without ghost zones) + + // Region statistics: + typedef ibbox::size_type size_type; + size_type exterior_size, owned_size, active_size; + + size_t memory () const CCTK_ATTRIBUTE_PURE; + istream & input (istream & is); + ostream & output (ostream & os) const; + }; + + struct full_dboxes { + + // Complete region description: + ibbox exterior; // whole region (including boundaries) b2vect is_outer_boundary; @@ -61,7 +82,14 @@ public: ibset ghosts; // ghost zones, as seen from Cactus ibbox interior; // interior (without ghost zones) - size_t memory () const; + bool operator== (full_dboxes const & b) const; + bool operator!= (full_dboxes const & b) const + { + return not operator==(b); + } + + size_t memory () const CCTK_ATTRIBUTE_PURE; + istream & input (istream& is); ostream & output (ostream & os) const; }; @@ -78,10 +106,19 @@ public: // Regridding schedule: + bool do_init; // the srpvects below are only defined + // if this is true srpvect fast_old2new_sync_sendrecv; srpvect fast_old2new_ref_prol_sendrecv; - size_t memory () const; + bool operator== (fast_dboxes const & b) const CCTK_ATTRIBUTE_PURE; + bool operator!= (fast_dboxes const & b) const + { + return not operator==(b); + } + + size_t memory () const CCTK_ATTRIBUTE_PURE; + istream & input (istream & is); ostream & output (ostream & os) const; }; @@ -91,8 +128,11 @@ private: typedef vector rboxes; // ... for each refinement level typedef vector mboxes; // ... for each multigrid level - typedef vector fast_cboxes; // ... for each component - typedef vector fast_rboxes; // ... for each refinement level + typedef vector full_cboxes; // ... for each component + typedef vector full_rboxes; // ... for each refinement level + typedef vector full_mboxes; // ... for each multigrid level + + typedef vector fast_rboxes; // ... for each refinement level typedef vector fast_mboxes; // ... for each multigrid level @@ -104,16 +144,17 @@ public: // should be readonly // Fields gh & h; // hierarchy + gh::dh_handle gh_handle; + i2vect ghost_width; // number of ghost zones i2vect buffer_width; // number of buffer zones int prolongation_order_space; // order of spatial prolongation operator mboxes boxes; // grid hierarchy - mboxes oldboxes; // old grid hierarchy, used during regridding fast_mboxes fast_boxes; // grid hierarchy - fast_mboxes fast_oldboxes; + typedef list::iterator ggf_handle; list gfs; // list of all grid functions public: @@ -127,51 +168,101 @@ public: ~dh (); // Helpers - int prolongation_stencil_size () const; + int prolongation_stencil_size () const CCTK_ATTRIBUTE_CONST; // Modifiers - void regrid (); + void regrid (bool do_init); + void regrid_free (bool do_init); void recompose (int rl, bool do_prolongate); private: - int this_proc (int rl, int c) const; - bool on_this_proc (int rl, int c) const; - bool on_this_proc (int rl, int c, int cc) const; - int this_oldproc (int rl, int c) const; - bool on_this_oldproc (int rl, int c) const; + int this_proc (int rl, int c) const CCTK_ATTRIBUTE_PURE; + bool on_this_proc (int rl, int c) const CCTK_ATTRIBUTE_PURE; + bool on_this_proc (int rl, int c, int cc) const CCTK_ATTRIBUTE_PURE; + int this_oldproc (int rl, int c) const CCTK_ATTRIBUTE_PURE; + bool on_this_oldproc (int rl, int c) const CCTK_ATTRIBUTE_PURE; + + static + void + broadcast_schedule (vector & fast_level_otherprocs, + fast_dboxes & fast_level, + srpvect fast_dboxes::* const schedule_item); public: // Grid function management - void add (ggf * f); - void remove (ggf * f); + ggf_handle add (ggf * f); + void erase (ggf_handle fi); // Output - size_t memory () const; + size_t memory () const CCTK_ATTRIBUTE_PURE; + static size_t allmemory () CCTK_ATTRIBUTE_PURE; ostream & output (ostream & os) const; }; +MPI_Datatype mpi_datatype (dh::dboxes const &) CCTK_ATTRIBUTE_CONST; +MPI_Datatype mpi_datatype (dh::fast_dboxes const &); +namespace dist { + template<> inline MPI_Datatype mpi_datatype () + CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () + { dh::dboxes dummy; return mpi_datatype(dummy); } + template<> inline MPI_Datatype mpi_datatype () + CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () + { dh::fast_dboxes dummy; return mpi_datatype(dummy); } +} + +inline size_t memoryof (dh::dboxes const & b) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (dh::dboxes const & b) { return b.memory (); } +inline size_t memoryof (dh::full_dboxes const & b) CCTK_ATTRIBUTE_PURE; +inline size_t memoryof (dh::full_dboxes const & b) +{ + return b.memory (); +} + +inline size_t memoryof (dh::fast_dboxes const & b) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (dh::fast_dboxes const & b) { return b.memory (); } +inline size_t memoryof (dh const & d) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (dh const & d) { return d.memory (); } +inline istream & operator>> (istream & is, dh::dboxes & b) +{ + return b.input (is); +} + +inline istream & operator>> (istream & is, dh::full_dboxes & b) +{ + return b.input (is); +} + +inline istream & operator>> (istream & is, dh::fast_dboxes & b) +{ + return b.input (is); +} + inline ostream & operator<< (ostream & os, dh::dboxes const & b) { return b.output (os); } +inline ostream & operator<< (ostream & os, dh::full_dboxes const & b) +{ + return b.output (os); +} + inline ostream & operator<< (ostream & os, dh::fast_dboxes const & b) { return b.output (os); diff --git a/Carpet/CarpetLib/src/dist.cc b/Carpet/CarpetLib/src/dist.cc index c870990fb..89acacfad 100644 --- a/Carpet/CarpetLib/src/dist.cc +++ b/Carpet/CarpetLib/src/dist.cc @@ -1,4 +1,5 @@ #include +#include #include #ifdef _OPENMP @@ -9,6 +10,8 @@ #include "cctk_Parameters.h" #include "defs.hh" +#include "limits.hh" +#include "startup_time.hh" #include "dist.hh" @@ -20,9 +23,11 @@ namespace dist { MPI_Comm comm_ = MPI_COMM_NULL; - MPI_Datatype mpi_complex8; - MPI_Datatype mpi_complex16; - MPI_Datatype mpi_complex32; + MPI_Datatype mpi_complex8 = MPI_DATATYPE_NULL; + MPI_Datatype mpi_complex16 = MPI_DATATYPE_NULL; + MPI_Datatype mpi_complex32 = MPI_DATATYPE_NULL; + + int total_num_threads_ = -1; void init (int& argc, char**& argv) { MPI_Init (&argc, &argv); @@ -34,19 +39,32 @@ namespace dist { #ifdef HAVE_CCTK_REAL4 CCTK_REAL4 dummy4; - MPI_Type_contiguous (2, datatype(dummy4), &mpi_complex8); + MPI_Type_contiguous (2, mpi_datatype(dummy4), &mpi_complex8); MPI_Type_commit (&mpi_complex8); #endif #ifdef HAVE_CCTK_REAL8 CCTK_REAL8 dummy8; - MPI_Type_contiguous (2, datatype(dummy8), &mpi_complex16); + MPI_Type_contiguous (2, mpi_datatype(dummy8), &mpi_complex16); MPI_Type_commit (&mpi_complex16); #endif #ifdef HAVE_CCTK_REAL16 CCTK_REAL16 dummy16; - MPI_Type_contiguous (2, datatype(dummy16), &mpi_complex32); + MPI_Type_contiguous (2, mpi_datatype(dummy16), &mpi_complex32); MPI_Type_commit (&mpi_complex32); #endif + + // Output startup time + // cerr << "QQQ: pseudoinit[1]" << endl; + CarpetLib::output_startup_time (); + // cerr << "QQQ: pseudoinit[2]" << endl; + + // Check and/or modify system limits + CarpetLib::set_system_limits (); + // cerr << "QQQ: pseudoinit[3]" << endl; + + // cerr << "QQQ: pseudoinit[4]" << endl; + collect_total_num_threads (); + // cerr << "QQQ: pseudoinit[5]" << endl; } void finalize () { @@ -56,10 +74,27 @@ namespace dist { // Create an MPI datatype from a C datatype description - void create_mpi_datatype (size_t const count, - mpi_struct_descr_t const descr[], - MPI_Datatype & newtype) + + ostream& operator<< (ostream& os, mpi_struct_descr_t const& descr) { + int type_size; + MPI_Type_size (descr.type, &type_size); + os << "{" + << "blocklength:" << descr.blocklength << "," + << "displacement:" << descr.displacement << "," + << "type:" << descr.type << "," + << "type_size:" << type_size << "," + << "field_name:" << descr.field_name << "," + << "type_name:" << descr.type_name + << "}"; + return os; + } + + MPI_Datatype create_mpi_datatype (size_t const count, + mpi_struct_descr_t const descr[], + char const * const name, size_t const size) + { + DECLARE_CCTK_PARAMETERS; int blocklengths[count]; MPI_Aint displacements[count]; MPI_Datatype types[count]; @@ -68,10 +103,128 @@ namespace dist { displacements[n] = descr[n].displacement; types [n] = descr[n].type; } + MPI_Datatype newtype; MPI_Type_struct (count, blocklengths, displacements, types, &newtype); MPI_Type_commit (&newtype); + if (verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "Creating new MPI type for C type %s:", name); + cout << " Type has " << count << " components" << endl; + for (size_t n=0; n + void + generic_mpi_datatype_t::add_field (size_t const offset, size_t const count, + string const field_name) + { + assert (not type_is_committed); + U u; + entries.push_back (field_t (offset, count, mpi_datatype(u), + field_name, typeid(U).name())); + } + + void + generic_mpi_datatype_t::commit () + { + DECLARE_CCTK_PARAMETERS; + + // Debug output + if (verbose) { + CCTK_VInfo (CCTK_THORNSTRING, + "Creating new MPI type for C type %s:", type_name.c_str()); + cout << *this; + } + + assert (not type_is_committed); + type_is_committed = true; + + // Out of caution -- this could be allowed + assert (not entries.empty()); + + // Create MPI type + size_t const count = entries.size(); + int blocklengths [count+1]; + MPI_Aint displacements[count+1]; + MPI_Datatype types [count+1]; + { + size_t n = 0; + for (list::const_iterator ifield = + entries.begin(); ifield!=entries.end(); ++ifield, ++n) + { + blocklengths [n] = ifield->count; + displacements[n] = ifield->offset; + types [n] = ifield->mpi_datatype; + } + assert (n == count); + // Add MPI_UB + blocklengths [n] = 1; + displacements[n] = type_size(); + types [n] = MPI_UB; + } + + MPI_Type_struct + (count+1, blocklengths, displacements, types, &mpi_datatype); + MPI_Type_commit (&mpi_datatype); + } + + ostream& + generic_mpi_datatype_t::output (ostream& os) const + { + cout << "Datatype: " << type_name << endl; + size_t const count = entries.size(); + cout << " Type has " << count << " components" << endl; + { + size_t n = 0; + for (list::const_iterator ifield = + entries.begin(); ifield!=entries.end(); ++ifield, ++n) + { + cout << " [" << n << "]: " << *ifield << endl; + } + assert (n == count); + } + cout << " MPI type ID: " << mpi_datatype << endl; + int datatypesize; + MPI_Type_size (mpi_datatype, &datatypesize); + cout << " C type size: " << size << endl; + cout << " MPI type size: " << datatypesize << endl; + return os; } +#endif + void checkpoint (const char* file, int line) { @@ -105,15 +258,48 @@ namespace dist { } // Global number of threads - int total_num_threads_worker () + void collect_total_num_threads () { - int total_num_threads_; int const mynthreads = num_threads(); + // cerr << "QQQ: collect_total_num_threads[1]" << endl; MPI_Allreduce (const_cast (& mynthreads), & total_num_threads_, 1, MPI_INT, MPI_SUM, comm()); + // cerr << "QQQ: collect_total_num_threads[2]" << endl; assert (total_num_threads_ >= size()); - return total_num_threads_; } - + + + + char const * c_datatype_name (unsigned const type) + { + switch (type) { + case 0: return "char"; + case 1: return "signed char"; + case 2: return "unsigned char"; + case 3: return "short"; + case 4: return "unsigned short"; + case 5: return "int"; + case 6: return "unsigned int"; + case 7: return "long"; + case 8: return "unsigned long"; + case 9: return "long long"; + case 10: return "unsigned long long"; + case 11: return "float"; + case 12: return "double"; + case 13: return "long double"; +#ifdef HAVE_CCTK_COMPLEX8 + case 14: return "CCTK_COMPLEX8"; +#endif +#ifdef HAVE_CCTK_COMPLEX16 + case 15: return "CCTK_COMPLEX16"; +#endif +#ifdef HAVE_CCTK_COMPLEX32 + case 16: return "CCTK_COMPLEX32"; +#endif + } + assert (0); abort(); + return NULL; + } + } // namespace dist diff --git a/Carpet/CarpetLib/src/dist.hh b/Carpet/CarpetLib/src/dist.hh index 6868d85ce..091da31e4 100644 --- a/Carpet/CarpetLib/src/dist.hh +++ b/Carpet/CarpetLib/src/dist.hh @@ -4,6 +4,7 @@ #include #include #include +#include #include #ifdef _OPENMP @@ -26,20 +27,102 @@ namespace dist { extern MPI_Datatype mpi_complex16; extern MPI_Datatype mpi_complex32; + extern int total_num_threads_; + void init (int& argc, char**& argv); void pseudoinit (MPI_Comm const c); void finalize (); + + // Create MPI datatypes from C structures + struct mpi_struct_descr_t { int blocklength; MPI_Aint displacement; MPI_Datatype type; + char const * field_name; + char const * type_name; + }; + + ostream& operator<< (ostream& os, mpi_struct_descr_t const& descr); + + MPI_Datatype create_mpi_datatype (size_t const count, + mpi_struct_descr_t const descr[], + char const * name, size_t size); +#if 0 + + class generic_mpi_datatype_t { + + string const type_name; + virtual size_t type_size() const = 0; + + struct field_t { + size_t offset; + size_t count; + MPI_Datatype mpi_datatype; + string field_name; + string type_name; + field_t (size_t const offset_, + size_t const count_, + MPI_Datatype const mpi_datatype_, + string const field_name_, + string const type_name_) + : offset(offset_), + count(count_), + mpi_datatype(mpi_datatype_), + field_name(field_name_), + type_name(type_name_) + { + } + ostream& output (ostream& os) const; + }; + friend ostream& operator<< (ostream& os, + generic_mpi_datatype_t::field_t const& field); + + list entries; + + bool type_is_committed; + MPI_Datatype mpi_datatype; + + public: + + generic_mpi_datatype_t (string const type_name_); + + template + void add_field (size_t offset, size_t count, string field_name); + + void commit (); + + MPI_Datatype get () const + { + assert (type_is_committed); + return mpi_datatype; + } + + ostream& output (ostream& os) const; + }; + + template + class mpi_datatype_t: public generic_mpi_datatype_t { + virtual size_t type_size() const + { + return sizeof(T); + } }; + + inline ostream& operator<< (ostream& os, + generic_mpi_datatype_t::field_t const& field) + { + return field.output(os); + } - void create_mpi_datatype (size_t const count, - mpi_struct_descr_t const descr[], - MPI_Datatype & newtype); + inline ostream& operator<< (ostream& os, generic_mpi_datatype_t const& type) + { + return type.output(os); + } + +#endif @@ -52,18 +135,21 @@ namespace dist { // Information about the communicator // Return the communicator + inline MPI_Comm comm () CCTK_ATTRIBUTE_CONST; inline MPI_Comm comm () { return comm_; } // Always return a good communicator + inline MPI_Comm goodcomm () CCTK_ATTRIBUTE_CONST; inline MPI_Comm goodcomm () { return comm_ != MPI_COMM_NULL ? comm_ : MPI_COMM_WORLD; } // Rank in the communicator (this processor's number, 0 .. size-1) + inline int rank () CCTK_ATTRIBUTE_CONST; inline int rank () { static int rank_ = -1; @@ -72,6 +158,7 @@ namespace dist { } // Size of the communicator + inline int size () CCTK_ATTRIBUTE_CONST; inline int size () { static int size_ = -1; @@ -83,6 +170,7 @@ namespace dist { void set_num_threads (int num_threads); // Local number of threads + inline int num_threads () CCTK_ATTRIBUTE_CONST; inline int num_threads () { static int num_threads_ = -1; @@ -98,13 +186,10 @@ namespace dist { } // Global number of threads - int total_num_threads_worker (); + void collect_total_num_threads (); + inline int total_num_threads () CCTK_ATTRIBUTE_CONST; inline int total_num_threads () { - static int total_num_threads_ = -1; - if (total_num_threads_ == -1) { - total_num_threads_ = total_num_threads_worker(); - } return total_num_threads_; } @@ -114,168 +199,248 @@ namespace dist { // C Datatype helpers // Map a C datatype to a 0-based index running up to c_ndatatypes(). ///////////////////////////////////////////////////////////////////////// + inline unsigned int c_datatype (const char&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const char&) { return 0; } - inline unsigned int c_datatype (const signed char&) + inline unsigned int c_datatype (const signed char&) CCTK_ATTRIBUTE_CONST; + inline unsigned int c_datatype (const signed char&) { return 1; } + inline unsigned int c_datatype (const unsigned char&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const unsigned char&) { return 2; } + inline unsigned int c_datatype (const short&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const short&) { return 3; } + inline unsigned int c_datatype (const unsigned short&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const unsigned short&) { return 4; } + inline unsigned int c_datatype (const int&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const int&) { return 5; } + inline unsigned int c_datatype (const unsigned int&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const unsigned int&) { return 6; } + inline unsigned int c_datatype (const long&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const long&) { return 7; } + inline unsigned int c_datatype (const unsigned long&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const unsigned long&) { return 8; } + inline unsigned int c_datatype (const long long&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const long long&) { return 9; } - inline unsigned int c_datatype (const float&) + inline unsigned int c_datatype (const unsigned long long&) CCTK_ATTRIBUTE_CONST; + inline unsigned int c_datatype (const unsigned long long&) { return 10; } - inline unsigned int c_datatype (const double&) + inline unsigned int c_datatype (const float&) CCTK_ATTRIBUTE_CONST; + inline unsigned int c_datatype (const float&) { return 11; } - inline unsigned int c_datatype (const long double&) + inline unsigned int c_datatype (const double&) CCTK_ATTRIBUTE_CONST; + inline unsigned int c_datatype (const double&) { return 12; } + inline unsigned int c_datatype (const long double&) CCTK_ATTRIBUTE_CONST; + inline unsigned int c_datatype (const long double&) + { return 13; } + #ifdef HAVE_CCTK_COMPLEX8 + inline unsigned int c_datatype (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const CCTK_COMPLEX8&) - { return 13; } + { return 14; } #endif #ifdef HAVE_CCTK_COMPLEX16 + inline unsigned int c_datatype (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const CCTK_COMPLEX16&) - { return 14; } + { return 15; } #endif #ifdef HAVE_CCTK_COMPLEX32 + inline unsigned int c_datatype (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST; inline unsigned int c_datatype (const CCTK_COMPLEX32&) - { return 15; } + { return 16; } #endif // keep this function's return code consistent with functions above + inline unsigned int c_ndatatypes () CCTK_ATTRIBUTE_CONST; inline unsigned int c_ndatatypes () - { return 16; } + { return 17; } template unsigned int c_datatype () { abort(); } - template<> inline unsigned int c_datatype () { return 0; } - template<> inline unsigned int c_datatype () { return 1; } - template<> inline unsigned int c_datatype () { return 2; } - template<> inline unsigned int c_datatype () { return 3; } - template<> inline unsigned int c_datatype () { return 4; } - template<> inline unsigned int c_datatype () { return 5; } - template<> inline unsigned int c_datatype () { return 6; } - template<> inline unsigned int c_datatype () { return 7; } - template<> inline unsigned int c_datatype () { return 8; } - template<> inline unsigned int c_datatype () { return 9; } - template<> inline unsigned int c_datatype () { return 10; } - template<> inline unsigned int c_datatype () { return 11; } - template<> inline unsigned int c_datatype () { return 12; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 0; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 1; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 2; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 3; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 4; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 5; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 6; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 7; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 8; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 9; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 10; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 11; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 12; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 13; } #ifdef HAVE_CCTK_COMPLEX8 - template<> inline unsigned int c_datatype () { return 13; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 14; } #endif #ifdef HAVE_CCTK_COMPLEX16 - template<> inline unsigned int c_datatype () { return 14; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 15; } #endif #ifdef HAVE_CCTK_COMPLEX32 - template<> inline unsigned int c_datatype () { return 15; } + template<> inline unsigned int c_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline unsigned int c_datatype () { return 16; } #endif + // Map a C datatype index to a string + char const * c_datatype_name (unsigned type) CCTK_ATTRIBUTE_CONST; + ///////////////////////////////////////////////////////////////// // MPI Datatype helpers // Map a C datatype to its corresponding MPI datatype. ///////////////////////////////////////////////////////////////// - inline MPI_Datatype datatype (const char&) + inline MPI_Datatype mpi_datatype (const char&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const char&) { return MPI_CHAR; } - inline MPI_Datatype datatype (const signed char&) + inline MPI_Datatype mpi_datatype (const signed char&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const signed char&) { return MPI_CHAR; } - inline MPI_Datatype datatype (const unsigned char&) + inline MPI_Datatype mpi_datatype (const unsigned char&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const unsigned char&) { return MPI_UNSIGNED_CHAR; } - inline MPI_Datatype datatype (const short&) + inline MPI_Datatype mpi_datatype (const short&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const short&) { return MPI_SHORT; } - inline MPI_Datatype datatype (const unsigned short&) + inline MPI_Datatype mpi_datatype (const unsigned short&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const unsigned short&) { return MPI_UNSIGNED_SHORT; } - inline MPI_Datatype datatype (const int&) + inline MPI_Datatype mpi_datatype (const int&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const int&) { return MPI_INT; } - inline MPI_Datatype datatype (const unsigned int&) + inline MPI_Datatype mpi_datatype (const unsigned int&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const unsigned int&) { return MPI_UNSIGNED; } - inline MPI_Datatype datatype (const long&) + inline MPI_Datatype mpi_datatype (const long&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const long&) { return MPI_LONG; } - inline MPI_Datatype datatype (const unsigned long&) + inline MPI_Datatype mpi_datatype (const unsigned long&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const unsigned long&) { return MPI_UNSIGNED_LONG; } - inline MPI_Datatype datatype (const long long&) + inline MPI_Datatype mpi_datatype (const long long&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const long long&) { return MPI_LONG_LONG_INT; } - inline MPI_Datatype datatype (const float&) + inline MPI_Datatype mpi_datatype (const unsigned long long&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const unsigned long long&) + { return MPI_LONG_LONG_INT; } // should be unsigned, but this doesn't exist + + inline MPI_Datatype mpi_datatype (const float&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const float&) { return MPI_FLOAT; } - inline MPI_Datatype datatype (const double&) + inline MPI_Datatype mpi_datatype (const double&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const double&) { return MPI_DOUBLE; } - inline MPI_Datatype datatype (const long double&) + inline MPI_Datatype mpi_datatype (const long double&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const long double&) { return MPI_LONG_DOUBLE; } #ifdef HAVE_CCTK_COMPLEX8 - inline MPI_Datatype datatype (const CCTK_COMPLEX8&) + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX8&) { return mpi_complex8; } #endif #ifdef HAVE_CCTK_COMPLEX16 - inline MPI_Datatype datatype (const CCTK_COMPLEX16&) + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX16&) { return mpi_complex16; } #endif #ifdef HAVE_CCTK_COMPLEX32 - inline MPI_Datatype datatype (const CCTK_COMPLEX32&) + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST; + inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX32&) { return mpi_complex32; } #endif - template MPI_Datatype datatype () { abort(); } - template<> inline MPI_Datatype datatype () { return MPI_CHAR; } - template<> inline MPI_Datatype datatype () { return MPI_CHAR; } - template<> inline MPI_Datatype datatype () { return MPI_UNSIGNED_CHAR; } - template<> inline MPI_Datatype datatype () { return MPI_SHORT; } - template<> inline MPI_Datatype datatype () { return MPI_UNSIGNED_SHORT; } - template<> inline MPI_Datatype datatype () { return MPI_INT; } - template<> inline MPI_Datatype datatype () { return MPI_UNSIGNED; } - template<> inline MPI_Datatype datatype () { return MPI_LONG; } - template<> inline MPI_Datatype datatype () { return MPI_UNSIGNED_LONG; } - template<> inline MPI_Datatype datatype () { return MPI_LONG_LONG_INT; } - template<> inline MPI_Datatype datatype () { return MPI_FLOAT; } - template<> inline MPI_Datatype datatype () { return MPI_DOUBLE; } - template<> inline MPI_Datatype datatype () { return MPI_LONG_DOUBLE; } + template MPI_Datatype mpi_datatype () { abort(); } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_CHAR; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_CHAR; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_UNSIGNED_CHAR; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_SHORT; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_UNSIGNED_SHORT; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_INT; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_UNSIGNED; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_LONG; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_UNSIGNED_LONG; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_LONG_LONG_INT; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_LONG_LONG_INT; } // should be unsigned, but this doesn't exist + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_FLOAT; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_DOUBLE; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return MPI_LONG_DOUBLE; } #ifdef HAVE_CCTK_COMPLEX8 - template<> inline MPI_Datatype datatype () { return mpi_complex8; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return mpi_complex8; } #endif #ifdef HAVE_CCTK_COMPLEX16 - template<> inline MPI_Datatype datatype () { return mpi_complex16; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return mpi_complex16; } #endif #ifdef HAVE_CCTK_COMPLEX32 - template<> inline MPI_Datatype datatype () { return mpi_complex32; } + template<> inline MPI_Datatype mpi_datatype () CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () { return mpi_complex32; } #endif } // namespace dist diff --git a/Carpet/CarpetLib/src/fulltree.cc b/Carpet/CarpetLib/src/fulltree.cc index cb6de5116..3234a73dc 100644 --- a/Carpet/CarpetLib/src/fulltree.cc +++ b/Carpet/CarpetLib/src/fulltree.cc @@ -15,6 +15,8 @@ fulltree::fulltree () : type (type_empty) { assert (invariant()); + // This is unused + assert (0); } @@ -193,6 +195,15 @@ fulltree::const_iterator::const_iterator (fulltree const & f_) if (f.is_branch()) { assert (f.subtrees.size() > 0); it = new const_iterator (* f.subtrees.at(i)); + while ((*it).done()) { + delete it; + it = 0; + ++ i; + if (done()) break; + // to do: use a new function "reset iterator" instead + it = new const_iterator (* f.subtrees.at(i)); + } + assert (done() or not (*it).done()); } } @@ -253,6 +264,7 @@ fulltree::const_iterator::operator++ () ++ i; } else { ++ *it; +#if 0 if ((*it).done()) { delete it; it = 0; @@ -260,8 +272,19 @@ fulltree::const_iterator::operator++ () if (not done()) { // to do: use a new function "reset iterator" instead it = new const_iterator (* f.subtrees.at(i)); + assert (not (*it).done()); } } +#endif + while ((*it).done()) { + delete it; + it = 0; + ++ i; + if (done()) break; + // to do: use a new function "reset iterator" instead + it = new const_iterator (* f.subtrees.at(i)); + } + assert (done() or not (*it).done()); } return *this; } @@ -289,6 +312,15 @@ fulltree::iterator::iterator (fulltree & f_) if (f.is_branch()) { assert (f.subtrees.size() > 0); it = new iterator (* f.subtrees.at(i)); + while ((*it).done()) { + delete it; + it = 0; + ++ i; + if (done()) break; + // to do: use a new function "reset iterator" instead + it = new iterator (* f.subtrees.at(i)); + } + assert (done() or not (*it).done()); } } @@ -349,6 +381,7 @@ fulltree::iterator::operator++ () ++ i; } else { ++ *it; +#if 0 if ((*it).done()) { delete it; it = 0; @@ -356,8 +389,19 @@ fulltree::iterator::operator++ () if (not done()) { // to do: use a new function "reset iterator" instead it = new iterator (* f.subtrees.at(i)); + assert (not (*it).done()); } } +#endif + while ((*it).done()) { + delete it; + it = 0; + ++ i; + if (done()) break; + // to do: use a new function "reset iterator" instead + it = new iterator (* f.subtrees.at(i)); + } + assert (done() or not (*it).done()); } return *this; } @@ -409,7 +453,7 @@ fulltree::output (ostream & os) const << "dir=" << dir << "," << "subtrees=["; for (size_t i=0; i +inline size_t memoryof (fulltree const & f) CCTK_ATTRIBUTE_PURE; +template inline size_t memoryof (fulltree const & f) { return f.memory(); } diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index 39c9ebf8c..e1888835c 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -26,34 +26,25 @@ using namespace CarpetLib; -// Hand out the next MPI tag -static int nexttag () -{ - DECLARE_CCTK_PARAMETERS; - - int const min_tag = 100; - static int last = 0; - ++last; - if (last >= 30000) last = 0; - return min_tag + last; -} +list gdata::allgdata; // Constructors gdata::gdata (const int varindex_, const centering cent_, - const operator_type transport_operator_, - const int tag_) + const operator_type transport_operator_) : _storage(NULL), varindex(varindex_), cent(cent_), transport_operator(transport_operator_), _has_storage(false), - comm_active(false), - tag(tag_ >= 0 ? tag_ : nexttag()) + comm_active(false) { DECLARE_CCTK_PARAMETERS; + + allgdatai = allgdata.insert(allgdata.end(), this); + if (barriers) { MPI_Barrier (dist::comm()); } @@ -63,6 +54,9 @@ gdata::gdata (const int varindex_, gdata::~gdata () { DECLARE_CCTK_PARAMETERS; + + allgdata.erase(allgdatai); + if (barriers) { MPI_Barrier (dist::comm()); } @@ -76,14 +70,17 @@ void gdata:: copy_from (comm_state & state, gdata const * const src, - ibbox const & box) + ibbox const & box, + int const dstproc, + int const srcproc) { - vector srcs (1, src); + vector const srcs (1, src); CCTK_REAL const time = 0.0; - vector times (1, time); + vector const times (1, time); transfer_from (state, srcs, times, box, box, + dstproc, srcproc, time, 0, 0); } @@ -96,37 +93,52 @@ transfer_from (comm_state & state, vector const & times, ibbox const & dstbox, ibbox const & srcbox, + int const dstproc, + int const srcproc, CCTK_REAL const time, int const order_space, int const order_time) { - assert (has_storage()); - assert (not dstbox.empty()); - assert (all(dstbox.lower() >= extent().lower())); - assert (all(dstbox.upper() <= extent().upper())); - assert (all(dstbox.stride() == extent().stride())); - assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0)); - - assert (not srcbox.empty()); - assert (srcs.size() == times.size() and srcs.size() > 0); - for (int t=0; t<(int)srcs.size(); ++t) { - assert (srcs.AT(t)->has_storage()); - assert (all(srcbox.lower() >= srcs.AT(t)->extent().lower())); - assert (all(srcbox.upper() <= srcs.AT(t)->extent().upper())); + bool const is_dst = dist::rank() == dstproc; + bool const is_src = dist::rank() == srcproc; + // Return early if this communication does not concern us + assert (is_dst or is_src); // why should we be here? + if (not is_dst and not is_src) return; + + if (is_dst) { + assert (proc() == dstproc); + assert (has_storage()); + assert (not dstbox.empty()); + assert (all(dstbox.lower() >= extent().lower())); + assert (all(dstbox.upper() <= extent().upper())); + assert (all(dstbox.stride() == extent().stride())); + assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0)); } - gdata const * const src = srcs.AT(0); - assert (transport_operator != op_error); - if (transport_operator == op_none) return; + if (is_src) { + assert (not srcbox.empty()); + assert (srcs.size() == times.size() and srcs.size() > 0); + for (int t=0; t<(int)srcs.size(); ++t) { + assert (srcs.AT(t)->proc() == srcproc); + assert (srcs.AT(t)->has_storage()); + assert (all(srcbox.lower() >= srcs.AT(t)->extent().lower())); + assert (all(srcbox.upper() <= srcs.AT(t)->extent().upper())); + } + } + gdata const * const src = is_src ? srcs.AT(0) : NULL; - // Return early if this communication does not concern us - if (dist::rank() != proc() and dist::rank() != src->proc()) return; + operator_type const my_transport_operator = + is_dst ? transport_operator : src->transport_operator; + assert (my_transport_operator != op_error); + assert (my_transport_operator != op_none); // why should we be here? + if (my_transport_operator == op_none) return; // Interpolate either on the source or on the destination processor, // depending on whether this increases or reduces the amount of data int timelevel0, ntimelevels; - find_source_timelevel (times, time, order_time, timelevel0, ntimelevels); - assert (int (srcs.size()) >= ntimelevels); + find_source_timelevel + (times, time, order_time, my_transport_operator, timelevel0, ntimelevels); + if (is_src) assert (int (srcs.size()) >= ntimelevels); int const dstpoints = dstbox.size(); int const srcpoints = srcbox.size() * ntimelevels; bool const interp_on_src = dstpoints <= srcpoints; @@ -136,46 +148,45 @@ transfer_from (comm_state & state, case state_get_buffer_sizes: // don't count processor-local copies - if (proc() != src->proc()) { - // if this is a destination processor: advance its recv buffer - // size - if (proc() == dist::rank()) { - state.reserve_recv_space (c_datatype(), src->proc(), npoints); + if (not (is_dst and is_src)) { + if (is_dst) { + // increment the recv buffer size + state.reserve_recv_space (c_datatype(), srcproc, npoints); } - // if this is a source processor: increment its send buffer size - if (src->proc() == dist::rank()) { - state.reserve_send_space (c_datatype(), proc(), npoints); + if (is_src) { + // increment the send buffer size + state.reserve_send_space (src->c_datatype(), dstproc, npoints); } } break; case state_fill_send_buffers: - // if this is a source processor: copy its data into the send - // buffer - if (proc() != src->proc()) { - if (src->proc() == dist::rank()) { + if (not (is_dst and is_src)) { + if (is_src) { + // copy the data into the send buffer if (interp_on_src) { - size_t const sendbufsize = c_datatype_size() * dstbox.size(); + size_t const sendbufsize = src->c_datatype_size() * dstbox.size(); void * const sendbuf = - state.send_buffer (c_datatype(), proc(), dstbox.size()); + state.send_buffer (src->c_datatype(), dstproc, dstbox.size()); gdata * const buf = - make_typed (varindex, cent, transport_operator, tag); - buf->allocate (dstbox, src->proc(), sendbuf, sendbufsize); + src->make_typed (src->varindex, src->cent, src->transport_operator); + buf->allocate (dstbox, srcproc, sendbuf, sendbufsize); buf->transfer_from_innerloop (srcs, times, dstbox, time, order_space, order_time); delete buf; - state.commit_send_space (c_datatype(), proc(), dstbox.size()); + state.commit_send_space (src->c_datatype(), dstproc, dstbox.size()); } else { for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) { - size_t const sendbufsize = c_datatype_size() * srcbox.size(); + size_t const sendbufsize = src->c_datatype_size() * srcbox.size(); void * const sendbuf = - state.send_buffer (c_datatype(), proc(), srcbox.size()); + state.send_buffer (src->c_datatype(), dstproc, srcbox.size()); gdata * const buf = - make_typed (varindex, cent, transport_operator, tag); - buf->allocate (srcbox, src->proc(), sendbuf, sendbufsize); + src->make_typed (src->varindex, src->cent, + src->transport_operator); + buf->allocate (srcbox, srcproc, sendbuf, sendbufsize); buf->copy_from_innerloop (srcs.AT(tl), srcbox); delete buf; - state.commit_send_space (c_datatype(), proc(), srcbox.size()); + state.commit_send_space (src->c_datatype(), dstproc, srcbox.size()); } } } @@ -184,45 +195,42 @@ transfer_from (comm_state & state, case state_do_some_work: // handle the processor-local case - if (proc() == src->proc()) { - if (proc() == dist::rank()) { - transfer_from_innerloop - (srcs, times, dstbox, time, order_space, order_time); - } + if (is_dst and is_src) { + transfer_from_innerloop + (srcs, times, dstbox, time, order_space, order_time); } break; case state_empty_recv_buffers: - // if this is a destination processor: copy it from the recv - // buffer - if (proc() != src->proc()) { - if (proc() == dist::rank()) { + if (not (is_dst and is_src)) { + if (is_dst) { + // copy from the recv buffer if (interp_on_src) { size_t const recvbufsize = c_datatype_size() * dstbox.size(); void * const recvbuf = - state.recv_buffer (c_datatype(), src->proc(), dstbox.size()); - gdata * const buf = - make_typed (varindex, cent, transport_operator, tag); - buf->allocate (dstbox, proc(), recvbuf, recvbufsize); - state.commit_recv_space (c_datatype(), src->proc(), dstbox.size()); + state.recv_buffer (c_datatype(), srcproc, dstbox.size()); + gdata * const buf = make_typed (varindex, cent, transport_operator); + buf->allocate (dstbox, dstproc, recvbuf, recvbufsize); + state.commit_recv_space (c_datatype(), srcproc, dstbox.size()); copy_from_innerloop (buf, dstbox); delete buf; } else { - gdata const * const null = 0; - vector bufs (timelevel0 + ntimelevels, null); - for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) { + gdata const * const null = NULL; + vector bufs (ntimelevels, null); + vector timebuf (ntimelevels); + for (int tl = 0; tl < ntimelevels; ++ tl) { size_t const recvbufsize = c_datatype_size() * srcbox.size(); void * const recvbuf = - state.recv_buffer (c_datatype(), src->proc(), srcbox.size()); - gdata * const buf = - make_typed (varindex, cent, transport_operator, tag); - buf->allocate (srcbox, proc(), recvbuf, recvbufsize); - state.commit_recv_space (c_datatype(), src->proc(), srcbox.size()); + state.recv_buffer (c_datatype(), srcproc, srcbox.size()); + gdata * const buf = make_typed (varindex, cent, transport_operator); + buf->allocate (srcbox, dstproc, recvbuf, recvbufsize); + state.commit_recv_space (c_datatype(), srcproc, srcbox.size()); bufs.AT(tl) = buf; + timebuf.AT(tl) = times.AT(timelevel0 + tl); } transfer_from_innerloop - (bufs, times, dstbox, time, order_space, order_time); - for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) { + (bufs, timebuf, dstbox, time, order_space, order_time); + for (int tl = 0; tl < ntimelevels; ++ tl) { delete bufs.AT(tl); } } @@ -231,7 +239,7 @@ transfer_from (comm_state & state, break; default: - assert (0); + assert (0); abort(); } } @@ -242,9 +250,9 @@ gdata:: find_source_timelevel (vector const & times, CCTK_REAL const time, int const order_time, + operator_type const transport_operator, int & timelevel0, int & ntimelevels) - const { // Ensure that the times are consistent assert (times.size() > 0); @@ -253,7 +261,8 @@ find_source_timelevel (vector const & times, CCTK_REAL const eps = 1.0e-12; CCTK_REAL const min_time = * min_element (times.begin(), times.end()); CCTK_REAL const max_time = * max_element (times.begin(), times.end()); - CCTK_REAL const some_time = abs (min_time) + abs (max_time); + // TODO: Use a real delta-time from somewhere instead of 1.0 + CCTK_REAL const some_time = abs (min_time) + abs (max_time) + 1.0; if (transport_operator != op_copy) { if (time < min_time - eps * some_time or time > max_time + eps * some_time) @@ -303,3 +312,18 @@ find_source_timelevel (vector const & times, assert (timelevel0 >= 0 and timelevel0 < (int)times.size()); assert (ntimelevels > 0); } + + + +size_t +gdata:: +allmemory () +{ + size_t mem = memoryof(allgdata); + for (list::const_iterator + gdatai = allgdata.begin(); gdatai != allgdata.end(); ++ gdatai) + { + mem += memoryof(**gdatai); + } + return mem; +} diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index 09622fb34..4b62cf564 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -24,14 +24,19 @@ using namespace std; // A generic data storage without type information class gdata { + + static list allgdata; + list::iterator allgdatai; protected: // should be readonly // Fields void * _storage; // A copy of the storage pointer - + +public: const int varindex; // Cactus variable index, or -1 +protected: centering cent; operator_type transport_operator; @@ -47,8 +52,6 @@ protected: // should be readonly bool comm_active; // a communication is going on MPI_Request request; // outstanding MPI request - int tag; // MPI tag for this object - private: // Forbid copying and passing by value gdata (gdata const &); @@ -59,8 +62,7 @@ public: // Constructors gdata (const int varindex, const centering cent = error_centered, - const operator_type transport_operator = op_error, - const int tag = -1); + const operator_type transport_operator = op_error); // Destructors virtual ~gdata (); @@ -69,8 +71,7 @@ public: virtual gdata* make_typed (const int varindex, const centering cent = error_centered, - const operator_type transport_operator = op_error, - const int tag = -1) const = 0; + const operator_type transport_operator = op_error) const = 0; // Storage management virtual void allocate (const ibbox& extent, const int proc, @@ -148,7 +149,9 @@ public: void copy_from (comm_state & state, gdata const * src, - ibbox const & box); + ibbox const & box, + int dstproc, + int srcproc); void transfer_from (comm_state & state, @@ -156,18 +159,21 @@ public: vector const & times, ibbox const & dstbox, ibbox const & srcbox, + int dstproc, + int srcproc, CCTK_REAL time, int order_space, int order_time); protected: + static void find_source_timelevel (vector const & times, CCTK_REAL time, int order_time, + operator_type transport_operator, int & timelevel0, - int & ntimelevels) - const; + int & ntimelevels); private: virtual @@ -186,8 +192,25 @@ private: int order_time) = 0; +public: + virtual size_t memory () const CCTK_ATTRIBUTE_PURE = 0; + static size_t allmemory () CCTK_ATTRIBUTE_PURE; + virtual ostream& output (ostream& os) const = 0; }; +inline size_t memoryof (gdata const & d) CCTK_ATTRIBUTE_PURE; +inline size_t memoryof (gdata const & d) +{ + return d.memory (); +} + +inline ostream& operator<< (ostream& os, const gdata& d) +{ + return d.output(os); +} + + + #endif // GDATA_HH diff --git a/Carpet/CarpetLib/src/gf.cc b/Carpet/CarpetLib/src/gf.cc index 37b06db75..696628c59 100644 --- a/Carpet/CarpetLib/src/gf.cc +++ b/Carpet/CarpetLib/src/gf.cc @@ -41,23 +41,23 @@ gf::~gf () // Access to the data template -const data* gf::operator() (int tl, int rl, int c, int ml) const +const data* gf::operator() (int tl, int rl, int lc, int ml) const { assert (rl>=0 and rl=0 and c=0 and lc=0 and ml=0 and tl*)storage.AT(ml).AT(rl).AT(c).AT(tl); + return (const data*)storage.AT(ml).AT(rl).AT(lc).AT(tl); } template -data* gf::operator() (int tl, int rl, int c, int ml) +data* gf::operator() (int tl, int rl, int lc, int ml) { assert (rl>=0 and rl=0 and c=0 and lc=0 and ml=0 and tl*)storage.AT(ml).AT(rl).AT(c).AT(tl); + return (data*)storage.AT(ml).AT(rl).AT(lc).AT(tl); } diff --git a/Carpet/CarpetLib/src/gf.hh b/Carpet/CarpetLib/src/gf.hh index d5feb0a63..be0a1bb94 100644 --- a/Carpet/CarpetLib/src/gf.hh +++ b/Carpet/CarpetLib/src/gf.hh @@ -46,13 +46,11 @@ public: // Helpers -protected: - - virtual gdata* typed_data (int tl, int rl, int c, int ml) + virtual gdata* typed_data (int tl, int rl, int lc, int ml) const { data* const vl = this->vectorleader - ? (data*)(*this->vectorleader)(tl,rl,c,ml) + ? (data*)(*this->vectorleader)(tl,rl,lc,ml) : NULL; return new data(this->varindex, h.refcent, this->transport_operator, @@ -64,11 +62,9 @@ protected: // Access to the data -public: - - virtual const data* operator() (int tl, int rl, int c, int ml) const; + virtual const data* operator() (int tl, int rl, int lc, int ml) const; - virtual data* operator() (int tl, int rl, int c, int ml); + virtual data* operator() (int tl, int rl, int lc, int ml); diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc index 41e6787dc..d975a55a1 100644 --- a/Carpet/CarpetLib/src/ggf.cc +++ b/Carpet/CarpetLib/src/ggf.cc @@ -7,6 +7,8 @@ #include "cctk.h" +#include "CarpetTimers.hh" + #include "defs.hh" #include "dh.hh" #include "th.hh" @@ -19,6 +21,10 @@ using namespace CarpetLib; +list ggf::allggf; + + + // Constructors ggf::ggf (const int varindex_, const operator_type transport_operator_, th& t_, dh& d_, @@ -44,12 +50,15 @@ ggf::ggf (const int varindex_, const operator_type transport_operator_, timelevels_.AT(ml).resize(d.h.reflevels(), 0); } - d.add(this); + allggfi = allggf.insert(allggf.end(), this); + + dh_handle = d.add(this); } // Destructors ggf::~ggf () { - d.remove(this); + d.erase(dh_handle); + allggf.erase(allggfi); } // Comparison @@ -69,23 +78,24 @@ void ggf::set_timelevels (const int ml, const int rl, const int new_timelevels) if (new_timelevels < timelevels(ml,rl)) { - for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) { + for (int lc=0; lc timelevels(ml,rl)) { - for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) { - storage.AT(ml).AT(rl).AT(c).resize (new_timelevels); + for (int lc=0; lcallocate - (d.boxes.AT(ml).AT(rl).AT(c).exterior, h.processor(rl,c)); + storage.AT(ml).AT(rl).AT(lc).AT(tl) = typed_data(tl,rl,lc,ml); + storage.AT(ml).AT(rl).AT(lc).AT(tl)->allocate + (d.boxes.AT(ml).AT(rl).AT(c).exterior, dist::rank()); } // for tl - } // for c + } // for lc } @@ -97,34 +107,39 @@ void ggf::set_timelevels (const int ml, const int rl, const int new_timelevels) void ggf::recompose_crop () { // Free storage that will not be needed - static Timer timer ("ggf::recompose_crop"); + static Carpet::Timer timer ("CarpetLib::ggf::recompose_crop"); timer.start (); for (int ml=0; mlallocate - (d.boxes.AT(ml).AT(rl).AT(c).exterior, h.processor(rl,c)); + storage.AT(ml).AT(rl).AT(lc).AT(tl) = typed_data(tl,rl,lc,ml); + storage.AT(ml).AT(rl).AT(lc).AT(tl)->allocate + (d.boxes.AT(ml).AT(rl).AT(c).exterior, dist::rank()); } // for tl - } // for c + } // for lc } // for ml - timer.stop (0); + timer.stop (); } void ggf::recompose_fill (comm_state & state, int const rl, bool const do_prolongate) { // Initialise the new storage - static Timer timer ("ggf::recompose_fill"); + static Carpet::Timer timer ("CarpetLib::ggf::recompose_fill"); timer.start (); - + for (int ml = 0; ml < h.mglevels(); ++ ml) { + assert (d.fast_boxes.AT(ml).AT(rl).do_init); + vector tls; if (do_prolongate and rl > 0 and - transport_operator != op_none and transport_operator != op_sync) + transport_operator != op_none and transport_operator != op_sync and + transport_operator != op_restrict) { int const numtl = timelevels (ml, rl); tls.resize (numtl); @@ -185,7 +204,9 @@ void ggf::recompose_fill (comm_state & state, int const rl, // Initialise from a coarser level of the new hierarchy, where // possible if (rl > 0) { - if (transport_operator != op_none and transport_operator != op_sync) { + if (transport_operator != op_none and transport_operator != op_sync and + transport_operator != op_restrict) + { for (int tl = 0; tl < timelevels (ml, rl); ++tl) { transfer_from_all (state, tl, rl, ml, @@ -199,43 +220,43 @@ void ggf::recompose_fill (comm_state & state, int const rl, } // for ml - timer.stop (0); + timer.stop (); } void ggf::recompose_free_old (const int rl) { // Delete old storage - static Timer timer ("dh::recompose_free_old"); + static Carpet::Timer timer ("dh::recompose_free_old"); timer.start (); for (int ml=0; ml<(int)oldstorage.size(); ++ml) { - for (int c=0; c<(int)oldstorage.AT(ml).AT(rl).size(); ++c) { - for (int tl=0; tl=0 and ml 0); - for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) { - fdata & fdatas = storage.AT(ml).AT(rl).AT(c); + for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) { + fdata & fdatas = storage.AT(ml).AT(rl).AT(lc); gdata * const tmpdata = fdatas.AT(ntl-1); for (int tl=ntl-1; tl>0; --tl) { fdatas.AT(tl) = fdatas.AT(tl-1); @@ -260,8 +281,8 @@ void ggf::cycle_all (int const rl, int const ml) { void ggf::flip_all (int const rl, int const ml) { assert (rl>=0 and rl=0 and ml=0 and rl=0 and mlstorage(); - size_t const size = fdatas.AT(0)->size() * fdatas.AT(0)->elementsize(); - for (int tl=1; tlstorage(); - memcpy (dstptr, srcptr, size); - } + for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) { + fdata const & fdatas = storage.AT(ml).AT(rl).AT(lc); + void const * const srcptr = fdatas.AT(0)->storage(); + size_t const size = fdatas.AT(0)->size() * fdatas.AT(0)->elementsize(); + for (int tl=1; tlstorage(); + memcpy (dstptr, srcptr, size); } } } @@ -322,7 +341,9 @@ ref_bnd_prolongate_all (comm_state & state, { // Interpolate assert (rl>=1); - if (transport_operator == op_none or transport_operator == op_sync) return; + if (transport_operator == op_none or transport_operator == op_sync or + transport_operator == op_restrict) + return; vector tl2s; static Timer timer ("ref_bnd_prolongate_all"); timer.start (); @@ -435,7 +456,9 @@ ref_prolongate_all (comm_state & state, CCTK_REAL const time) { assert (rl>=1); - if (transport_operator == op_none or transport_operator == op_sync) return; + if (transport_operator == op_none or transport_operator == op_sync or + transport_operator == op_restrict) + return; static Timer timer ("ref_prolongate_all"); timer.start (); vector tl2s; @@ -467,8 +490,7 @@ transfer_from_all (comm_state & state, assert (ml1>=0 and ml1=0 and tl1=0 and rl2<(int)srcstorage.AT(ml2).size()); for (size_t i = 0; i < tl2s.size(); ++ i) { int const tl2 = tl2s.AT(i); - assert (tl2>=0 and tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(0).size()); + assert (tl2>=0); + int const lc = 0; + if (lc < int(srcstorage.AT(ml2).AT(rl2).size())) { + assert (tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(lc).size()); + } } // Set up source times @@ -514,14 +540,21 @@ transfer_from_all (comm_state & state, ibbox const & recv = precv.extent; int const c2 = psend.component; int const c1 = precv.component; + int const lc2 = h.get_local_component(rl2,c2); + int const lc1 = h.get_local_component(rl1,c1); + int const p2 = h.processor(rl2,c2); + int const p1 = h.processor(rl1,c1); // Source and destination data - gdata * const dst = storage.AT(ml1).AT(rl1).AT(c1).AT(tl1); + gdata * const dst = + lc1>=0 ? storage.AT(ml1).AT(rl1).AT(lc1).AT(tl1) : NULL; cdata const & srcs = srcstorage.AT(ml2).AT(rl2); for (int i=0; i<(int)gsrcs.size(); ++i) { - gsrcs.AT(i) = srcs.AT(c2).AT(tl2s.AT(i)); + gsrcs.AT(i) = lc2>=0 ? srcs.AT(lc2).AT(tl2s.AT(i)) : NULL; } - dst->transfer_from (state, gsrcs, times, recv, send, time, pos, pot); + + dst->transfer_from + (state, gsrcs, times, recv, send, p1, p2 , time, pos, pot); } total.stop (0); @@ -546,3 +579,16 @@ memory () memoryof (vectorleader) + memoryof (oldstorage); } + +size_t +ggf:: +allmemory () +{ + size_t mem = memoryof(allggf); + for (list::const_iterator + ggfi = allggf.begin(); ggfi != allggf.end(); ++ ggfi) + { + mem += memoryof(**ggfi); + } + return mem; +} diff --git a/Carpet/CarpetLib/src/ggf.hh b/Carpet/CarpetLib/src/ggf.hh index b2c86b8db..f89a73ee0 100644 --- a/Carpet/CarpetLib/src/ggf.hh +++ b/Carpet/CarpetLib/src/ggf.hh @@ -28,6 +28,9 @@ ostream& operator<< (ostream& os, const ggf& f); // A generic grid function without type information class ggf { + + static list allggf; + list::iterator allggfi; // Types typedef list iblist; @@ -38,7 +41,7 @@ class ggf { typedef gdata* tdata; // data ... typedef vector fdata; // ... for each time level - typedef vector cdata; // ... for each component + typedef vector cdata; // ... for each local component typedef vector rdata; // ... for each refinement level typedef vector mdata; // ... for each multigrid level @@ -53,6 +56,7 @@ public: // should be readonly const gh &h; // grid hierarchy dh &d; // data hierarchy + dh::ggf_handle dh_handle; protected: vector > timelevels_; // time levels [ml][rl] @@ -80,10 +84,10 @@ public: virtual ~ggf (); // Comparison - bool operator== (const ggf& f) const; + bool operator== (const ggf& f) const CCTK_ATTRIBUTE_PURE; // Querying - int timelevels (int const ml, int const rl) const + int timelevels (int const ml, int const rl) const CCTK_ATTRIBUTE_PURE { return timelevels_.AT(ml).AT(rl); } @@ -141,9 +145,7 @@ public: // Helpers -protected: - - virtual gdata* typed_data (int tl, int rl, int c, int ml) = 0; + virtual gdata* typed_data (int tl, int rl, int lc, int ml) const = 0; @@ -181,13 +183,14 @@ protected: public: // Access to the data - virtual const gdata* operator() (int tl, int rl, int c, int ml) const = 0; - virtual gdata* operator() (int tl, int rl, int c, int ml) = 0; + virtual const gdata* operator() (int tl, int rl, int lc, int ml) const CCTK_ATTRIBUTE_PURE = 0; + virtual gdata* operator() (int tl, int rl, int lc, int ml) CCTK_ATTRIBUTE_PURE = 0; // Output - virtual size_t memory () const; + virtual size_t memory () const CCTK_ATTRIBUTE_PURE = 0; + static size_t allmemory () CCTK_ATTRIBUTE_PURE; virtual ostream& output (ostream& os) const = 0; private: @@ -199,6 +202,7 @@ private: +inline size_t memoryof (ggf const & f) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (ggf const & f) { return f.memory (); diff --git a/Carpet/CarpetLib/src/gh.cc b/Carpet/CarpetLib/src/gh.cc index 9ecdb925c..21cb399a6 100644 --- a/Carpet/CarpetLib/src/gh.cc +++ b/Carpet/CarpetLib/src/gh.cc @@ -6,6 +6,8 @@ #include "cctk.h" #include "cctk_Parameters.h" +#include "CarpetTimers.hh" + #include "defs.hh" #include "dh.hh" #include "th.hh" @@ -19,7 +21,11 @@ using namespace CarpetLib; - // Constructors +list gh::allgh; + + + +// Constructors gh:: gh (vector const & reffacts_, centering const refcent_, int const mgfact_, centering const mgcent_, @@ -65,12 +71,15 @@ gh (vector const & reffacts_, centering const refcent_, boundary_width[0] + boundary_width[1])); } } + + allghi = allgh.insert(allgh.end(), this); } // Destructors gh:: ~gh () { + allgh.erase(allghi); } @@ -78,14 +87,16 @@ gh:: // Modifiers void gh:: -regrid (rregs const & superregs, mregs const & regs) +regrid (rregs const & superregs, mregs const & regs, bool const do_init) { DECLARE_CCTK_PARAMETERS; + + static Carpet::Timer timer ("CarpetLib::gh::regrid"); + timer.start(); superregions = superregs; - // Save the grid hierarchy - oldregions.clear (); + assert (oldregions.empty()); swap (oldregions, regions); regions = regs; @@ -182,19 +193,70 @@ regrid (rregs const & superregs, mregs const & regs) } } + + + // Calculate global and local components + global_components_.resize(reflevels()); + local_components_.resize(reflevels()); + for (int rl=0; rl::iterator t=ths.begin(); t!=ths.end(); ++t) { (*t)->regrid(); } for (list::iterator d=dhs.begin(); d!=dhs.end(); ++d) { - (*d)->regrid(); + (*d)->regrid(do_init); + } + + timer.stop(); +} + + + +void +gh:: +regrid_free (bool const do_init) +{ + oldregions.clear(); + + for (list::iterator t=ths.begin(); t!=ths.end(); ++t) { + (*t)->regrid_free(); + } + + for (list::iterator d=dhs.begin(); d!=dhs.end(); ++d) { + (*d)->regrid_free(do_init); } } @@ -205,11 +267,6 @@ gh:: recompose (int const rl, bool const do_prolongate) { - // Handle changes in number of mglevels - if (oldregions.size() != regions.size()) { - oldregions.resize (regions.size()); - } - bool const do_recompose = level_did_change(rl); if (do_recompose) { @@ -219,12 +276,6 @@ recompose (int const rl, (*d)->recompose (rl, do_prolongate); } - // Overwrite old with new grid hierarchy - for (int ml=0; ml::const_iterator + ghi = allgh.begin(); ghi != allgh.end(); ++ ghi) + { + mem += memoryof(**ghi); + } + return mem; +} + // Output diff --git a/Carpet/CarpetLib/src/gh.hh b/Carpet/CarpetLib/src/gh.hh index b80d71ca3..1942ae38b 100644 --- a/Carpet/CarpetLib/src/gh.hh +++ b/Carpet/CarpetLib/src/gh.hh @@ -28,6 +28,9 @@ class gh; // level. The extents do not include ghost zones. class gh { + static list allgh; + list::iterator allghi; + public: // Types @@ -47,14 +50,21 @@ public: // should be readonly vector > baseextents; // [ml][rl] const i2vect boundary_width; +private: + vector > global_components_; // [rl][lc] + vector > local_components_; // [rl][c] +public: + // Extents of the regions before distributing them over the // processors rregs superregions; mregs regions; // extents and properties of all grids - mregs oldregions; // a copy, used during regridding + mregs oldregions; // extents and properties of all grids + typedef list::iterator th_handle; list ths; // list of all time hierarchies + typedef list::iterator dh_handle; list dhs; // list of all data hierarchies public: @@ -69,64 +79,67 @@ public: ~gh (); // Modifiers - void regrid (rregs const & superregs, mregs const & regs); + void regrid (rregs const & superregs, mregs const & regs, bool do_init); + void regrid_free (bool do_init); bool recompose (int rl, bool do_prolongate); private: - bool level_did_change (int rl) const; + bool level_did_change (int rl) CCTK_ATTRIBUTE_PURE; // Accessors public: - ibbox const & extent (const int ml, const int rl, const int c) const + ibbox const & extent (const int ml, const int rl, const int c) const CCTK_ATTRIBUTE_PURE { return regions.AT(ml).AT(rl).AT(c).extent; } - ibbox const & baseextent (const int ml, const int rl) const + ibbox const & baseextent (const int ml, const int rl) const CCTK_ATTRIBUTE_PURE { return baseextents.AT(ml).AT(rl); } - b2vect const & outer_boundaries (const int rl, const int c) const + b2vect const & outer_boundaries (const int rl, const int c) const CCTK_ATTRIBUTE_PURE { return regions.AT(0).AT(rl).AT(c).outer_boundaries; } - int processor (const int rl, const int c) const + int processor (const int rl, const int c) const CCTK_ATTRIBUTE_PURE { return regions.AT(0).AT(rl).AT(c).processor; } - int old_processor (const int rl, const int c) const + int old_processor (const int rl, const int c) const CCTK_ATTRIBUTE_PURE { return oldregions.AT(0).AT(rl).AT(c).processor; } - int mglevels () const + int mglevels () const CCTK_ATTRIBUTE_PURE { return (int)regions.size(); } - int reflevels () const + int reflevels () const CCTK_ATTRIBUTE_PURE { if (mglevels() == 0) return 0; return (int)regions.AT(0).size(); } - int components (const int rl) const + int components (const int rl) const CCTK_ATTRIBUTE_PURE { return (int)regions.AT(0).AT(rl).size(); } - bool is_local (const int rl, const int c) const + bool is_local (const int rl, const int c) const CCTK_ATTRIBUTE_PURE { return processor(rl,c) == dist::rank(); } - int local_components (const int rl) const; + int local_components (int rl) const CCTK_ATTRIBUTE_PURE; + int get_component (int rl, int lc) const CCTK_ATTRIBUTE_PURE; + int get_local_component (int rl, int c) const CCTK_ATTRIBUTE_PURE; void locate_position (rvect const & rpos, int const ml, @@ -139,15 +152,16 @@ public: int & rl, int & c, ivect & aligned_ipos) const; // Time hierarchy management - void add (th * t); - void remove (th * t); + th_handle add (th * t); + void erase (th_handle ti); // Data hierarchy management - void add (dh * d); - void remove (dh * d); + dh_handle add (dh * d); + void erase (dh_handle di); // Output - size_t memory () const; + size_t memory () const CCTK_ATTRIBUTE_PURE; + static size_t allmemory () CCTK_ATTRIBUTE_PURE; ostream & output (ostream & os) const; private: @@ -158,6 +172,7 @@ private: +inline size_t memoryof (gh const & g) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (gh const & g) { return g.memory (); diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc index 0b984b142..9dad6a55d 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc index c0e8b44fd..6fdaa854d 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc index 7d4c7fe26..0a2f5c66c 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc index f4204ea68..848d04a7e 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc index f3693c220..729bb20b5 100644 --- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/limits.cc b/Carpet/CarpetLib/src/limits.cc new file mode 100644 index 000000000..f3c5ed650 --- /dev/null +++ b/Carpet/CarpetLib/src/limits.cc @@ -0,0 +1,95 @@ +#include +#include + +#include +#include +#include +#include + +#include "defs.hh" + +#include "limits.hh" + +namespace CarpetLib { + + using namespace std; + + + + static + void + set_limit (int resource, char const * name, CCTK_INT value); + + static + ostream & + operator<< (ostream & s, struct rlimit const & limit); + + static + void + output (ostream & s, rlim_t const & value); + + + + void + set_system_limits () + { + DECLARE_CCTK_PARAMETERS; + set_limit (RLIMIT_CORE, "core file size", max_core_size_MB); + set_limit (RLIMIT_AS, "memory size", max_memory_size_MB); + } + + + + void + set_limit (int const resource, char const * const name, CCTK_INT const value) + { + struct rlimit limit; + check (not getrlimit (resource, & limit)); + + if (value == -2 ) { + // Only show limit + cout << "Current " << name << " limit: " << limit << endl; + return; + } + + cout << "Old " << name << " limit: " << limit << endl; + + if (value == -1) { + limit.rlim_cur = limit.rlim_max; + } else { + limit.rlim_cur = min ((rlim_t) value * 1024 * 1024, limit.rlim_max); + } + + check (not setrlimit (resource, & limit)); + check (not getrlimit (resource, & limit)); + + cout << "New " << name << " limit: " << limit << endl; + } + + + + static + ostream & + operator<< (ostream & s, struct rlimit const & limit) + { + s << "hard="; + output (s, limit.rlim_max); + s << ", soft="; + output (s, limit.rlim_cur); + return s; + } + + + + static + void + output (ostream & s, rlim_t const & value) + { + if (value == RLIM_INFINITY) { + s << "[unlimited]"; + } else { + s << (value / CCTK_REAL (1024*1024)) << " MB"; + } + } + +} // namespace Carpet diff --git a/Carpet/CarpetLib/src/limits.hh b/Carpet/CarpetLib/src/limits.hh new file mode 100644 index 000000000..910f02e26 --- /dev/null +++ b/Carpet/CarpetLib/src/limits.hh @@ -0,0 +1,8 @@ +#include + +namespace CarpetLib { + + void + set_system_limits (); + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/make.code.defn b/Carpet/CarpetLib/src/make.code.defn index 88f6261ce..81a0352ad 100644 --- a/Carpet/CarpetLib/src/make.code.defn +++ b/Carpet/CarpetLib/src/make.code.defn @@ -13,12 +13,16 @@ SRCS = bbox.cc \ gf.cc \ ggf.cc \ gh.cc \ + limits.cc \ mem.cc \ + mpi_string.cc \ region.cc \ + startup_time.cc \ th.cc \ timestat.cc \ vect.cc \ copy_3d.cc \ + copy_4d.cc \ interpolate_3d_2tl.cc \ interpolate_3d_3tl.cc \ interpolate_3d_4tl.cc \ @@ -26,6 +30,7 @@ SRCS = bbox.cc \ interpolate_eno_3d_3tl.cc \ restrict_3d_cc_rf2.cc \ restrict_3d_rf2.cc \ + restrict_4d_rf2.cc \ prolongate_3d_cc_rf2.cc \ prolongate_3d_o1_rf2.cc \ prolongate_3d_o3_rf2.cc \ @@ -33,8 +38,13 @@ SRCS = bbox.cc \ prolongate_3d_o7_rf2.cc \ prolongate_3d_o9_rf2.cc \ prolongate_3d_o11_rf2.cc \ + prolongate_3d_cc_o0_rf2.cc \ + prolongate_3d_cc_o1_rf2.cc \ + prolongate_3d_cc_o2_rf2.cc \ + prolongate_3d_o5_monotone_rf2.cc \ prolongate_3d_real8_eno.F90 \ - prolongate_3d_real8_weno.F90 + prolongate_3d_real8_weno.F90 \ + prolongate_4d_o1_rf2.cc # Subdirectories containing source files SUBDIRS = diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc index 848988637..b3840a115 100644 --- a/Carpet/CarpetLib/src/mem.cc +++ b/Carpet/CarpetLib/src/mem.cc @@ -21,6 +21,11 @@ #include "defs.hh" #include "dist.hh" +#include "dh.hh" +#include "gdata.hh" +#include "ggf.hh" +#include "gh.hh" +#include "th.hh" #include "mem.hh" @@ -30,12 +35,18 @@ using namespace std; +double const MEGA = 1024*1024; + + + struct mstat { - // Carpet statistics + // Carpet object statistics double total_bytes; double total_objects; double max_bytes; double max_objects; + // Carpet administrative data structure statistics + double total_admin_bytes; // malloc statistics double malloc_used_bytes; double malloc_free_bytes; @@ -71,15 +82,15 @@ mem (size_t const vectorlength, size_t const nelems, if (memptr == NULL) { const double nbytes = vectorlength * nelems * sizeof (T); if (max_allowed_memory_MB > 0 - and (total_allocated_bytes + nbytes > 1.0e6 * max_allowed_memory_MB)) + and (total_allocated_bytes + nbytes > MEGA * max_allowed_memory_MB)) { T Tdummy; CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "Refusing to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects. The parameter file specifies a maximum of %d MB", - double(nbytes), double(nbytes/1.0e6), + double(nbytes), double(nbytes/MEGA), typestring(Tdummy), double(total_allocated_bytes), - double(total_allocated_bytes/1.0e6), + double(total_allocated_bytes/MEGA), int(total_allocated_objects), int(max_allowed_memory_MB)); } @@ -90,10 +101,10 @@ mem (size_t const vectorlength, size_t const nelems, T Tdummy; CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects", - double(nbytes), double(nbytes/1.0e6), + double(nbytes), double(nbytes/MEGA), typestring(Tdummy), double(total_allocated_bytes), - double(total_allocated_bytes/1.0e6), + double(total_allocated_bytes/MEGA), int(total_allocated_objects)); } total_allocated_bytes += nbytes; @@ -214,7 +225,7 @@ alloc (size_t nbytes) if (not freeptr) { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "Failed to allocate %.3f MB of memory", - double(freesize/1.0e6)); + double(freesize/MEGA)); } // Remember the pointer so that it can be freed chunks.push (freeptr); @@ -251,40 +262,6 @@ memory () -extern "C" void CarpetLib_setmemlimit (CCTK_ARGUMENTS); - -void CarpetLib_setmemlimit (CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - // Set address space limit - struct rlimit aslimit; - { - check (not getrlimit (RLIMIT_AS, & aslimit)); - } - CCTK_VInfo (CCTK_THORNSTRING, - "Old address space size limit: hard=%lld, soft=%lld", - (long long) aslimit.rlim_max, (long long) aslimit.rlim_cur); - if (max_allowed_memory_MB > 0) { - aslimit.rlim_cur = max_allowed_memory_MB * 1000000LL; - } - { - check (not setrlimit (RLIMIT_AS, & aslimit)); - } - { - check (not getrlimit (RLIMIT_AS, & aslimit)); - } - CCTK_VInfo (CCTK_THORNSTRING, - "Old address space size limit: hard=%lld, soft=%lld", - (long long) aslimit.rlim_max, (long long) aslimit.rlim_cur); - CCTK_VInfo (CCTK_THORNSTRING, - "(Unlimited address space size indicated by %lld)", - (long long) RLIM_INFINITY); -} - - - extern "C" void CarpetLib_printmemstats (CCTK_ARGUMENTS); void CarpetLib_printmemstats (CCTK_ARGUMENTS) @@ -294,14 +271,17 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS) int const ioproc = 0; - if (print_memstats_every > 0 - and cctk_iteration % print_memstats_every == 0) + if ((print_memstats_every == 0 and cctk_iteration == 0) or + (print_memstats_every > 0 and cctk_iteration % print_memstats_every == 0)) { mstat mybuf; mybuf.total_bytes = total_allocated_bytes; mybuf.total_objects = total_allocated_objects; mybuf.max_bytes = max_allocated_bytes; mybuf.max_objects = max_allocated_objects; + mybuf.total_admin_bytes = + gh::allmemory() + dh::allmemory() + th::allmemory() + + ggf::allmemory() + gdata::allmemory(); #ifdef HAVE_MALLINFO // NOTE: struct mallinfo returns byte-counts as int, which can // overflow. In this case, the information is incorrect. @@ -316,14 +296,23 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS) cout << "Memory statistics from CarpetLib:" << eol << " Current number of objects: " << total_allocated_objects << eol << " Current allocated memory: " - << setprecision(3) << total_allocated_bytes / 1.0e6 << " MB" << eol + << setprecision(3) << total_allocated_bytes / MEGA << " MB" << eol << " Maximum number of objects: " << max_allocated_objects << eol << " Maximum allocated memory: " - << setprecision(3) << max_allocated_bytes / 1.0e6 << " MB" << eol + << setprecision(3) << max_allocated_bytes / MEGA << " MB" << eol + << " Current administrative memory: " + << setprecision(3) << mybuf.total_admin_bytes / MEGA << " MB" << eol << " Total allocated used system memory: " - << setprecision(3) << mybuf.malloc_used_bytes / 1.0e6 << " MB" << eol + << setprecision(3) << mybuf.malloc_used_bytes / MEGA << " MB" << eol << " Total allocated free system memory: " - << setprecision(3) << mybuf.malloc_free_bytes / 1.0e6 << " MB" << endl; + << setprecision(3) << mybuf.malloc_free_bytes / MEGA << " MB" << endl; + +#warning "TODO" + cout << " gh::allmemory: " << gh ::allmemory() << eol + << " dh::allmemory: " << dh ::allmemory() << eol + << " th::allmemory: " << th ::allmemory() << eol + << " ggf::allmemory: " << ggf ::allmemory() << eol + << " gdata::allmemory: " << gdata::allmemory() << endl; if (strcmp (memstat_file, "") != 0) { vector allbuf (dist::size()); @@ -333,9 +322,15 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS) if (dist::rank() == ioproc) { + double max_total_bytes = 0; + double avg_total_bytes = 0; + double cnt_total_bytes = 0; double max_max_bytes = 0; double avg_max_bytes = 0; double cnt_max_bytes = 0; + double max_admin_bytes = 0; + double avg_admin_bytes = 0; + double cnt_admin_bytes = 0; double max_used_bytes = 0; double avg_used_bytes = 0; double cnt_used_bytes = 0; @@ -343,9 +338,15 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS) double avg_free_bytes = 0; double cnt_free_bytes = 0; for (size_t n=0; n +inline size_t memoryof (mem const & m) CCTK_ATTRIBUTE_PURE; template inline size_t memoryof (mem const & m) { return m.memory(); } @@ -84,9 +86,10 @@ public: void * alloc (size_t nbytes); // Memory usage - size_t memory () const; + size_t memory () const CCTK_ATTRIBUTE_PURE; }; +inline size_t memoryof (mempool const & m) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (mempool const & m) { return m.memory(); } #endif // ifndef MEM_HH diff --git a/Carpet/CarpetLib/src/mpi_string.cc b/Carpet/CarpetLib/src/mpi_string.cc new file mode 100644 index 000000000..063ed0bdf --- /dev/null +++ b/Carpet/CarpetLib/src/mpi_string.cc @@ -0,0 +1,480 @@ +#include +#include +#include +#include +#include + +#include + +#include "cctk.h" + +#include "dh.hh" +#include "mpi_string.hh" +#include "region.hh" + + + +namespace CarpetLib +{ + + using namespace std; + + + + vector + allgather_string (MPI_Comm const comm, + string const & data) + { + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Gather the lengths of the data strings + int const length = data.length(); + vector lengths (num_procs); + + MPI_Allgather (const_cast (& length), 1, MPI_INT, + & lengths.front(), 1, MPI_INT, + comm); + + // Allocate space for all data strings + vector offsets (num_procs + 1); + offsets.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets.AT(n + 1) = offsets.AT(n) + lengths.AT(n); + } + int const total_length = offsets.AT(num_procs); + vector alldata_buffer (total_length); + + // Gather all data strings + MPI_Allgatherv (const_cast (data.c_str()), length, MPI_CHAR, + & alldata_buffer.front(), + const_cast (& lengths.front()), + const_cast (& offsets.front()), + MPI_CHAR, + comm); + + // Convert data buffer with C strings to C++ strings + vector alldata (num_procs); + for (int n = 0; n < num_procs; ++ n) + { + alldata.AT(n) = + string (& alldata_buffer.AT (offsets.AT(n)), lengths.AT(n)); + } + + return alldata; + } + + + + vector + alltoallv_string (MPI_Comm const comm, + vector const & data) + { + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Exchange the lengths of the data strings + vector lengths_in (num_procs); + for (int n = 0; n < num_procs; ++ n) + { + lengths_in.AT(n) = data.AT(n).length(); + } + vector lengths (num_procs); + MPI_Alltoall (& lengths_in.front(), 1, MPI_INT, + & lengths.front(), 1, MPI_INT, + comm); + + // Allocate space for all data strings + vector offsets_in (num_procs + 1); + offsets_in.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_in.AT(n + 1) = offsets_in.AT(n) + lengths_in.AT(n); + } + int const total_length_in = offsets_in.AT(num_procs); + vector alldata_buffer_in (total_length_in); + + vector offsets (num_procs + 1); + offsets.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets.AT(n + 1) = offsets.AT(n) + lengths.AT(n); + } + int const total_length = offsets.AT(num_procs); + vector alldata_buffer (total_length); + + // Convert C++ strings to data buffer with C strings + for (int n = 0; n < num_procs; ++ n) + { + memcpy (& alldata_buffer_in.AT (offsets_in.AT(n)), + data.AT(n).c_str(), + lengths_in.AT(n)); + } + + // Exchange all data strings + MPI_Alltoallv (& alldata_buffer_in.front(), + & lengths_in.front(), & offsets_in.front(), MPI_CHAR, + & alldata_buffer.front(), + & lengths.front(), & offsets.front(), MPI_CHAR, + comm); + + // Convert data buffer with C strings to C++ strings + vector alldata (num_procs); + for (int n = 0; n < num_procs; ++ n) + { + alldata.AT(n) = + string (& alldata_buffer.AT (offsets.AT(n)), lengths.AT(n)); + } + + return alldata; + } + + + + string + broadcast_string (MPI_Comm const comm, + int const root, + string const & data) + { + // Get my rank + int rank; + MPI_Comm_rank (comm, & rank); + + if (rank == root) { + + // Broadcast the length of the data string + int const length = data.length(); + MPI_Bcast (const_cast (& length), 1, MPI_INT, root, comm); + + // Broadcast data string + char const * const buf = data.c_str(); + MPI_Bcast (const_cast (buf), length, MPI_CHAR, root, comm); + + // Return original string + return data; + + } else { + + // Broadcast the length of the data string + int length; + MPI_Bcast (& length, 1, MPI_INT, root, comm); + + // Allocate space for data string + vector data_buffer (length); + + // Broadcast data string + char * const buf = & data_buffer.front(); + MPI_Bcast (buf, length, MPI_CHAR, root, comm); + + // Convert data buffer with C strings to C++ strings + string const result = string (& data_buffer.front(), length); + + return result; + + } + } + + + + ////////////////////////////////////////////////////////////////////////////// + + + + template + vector > + allgatherv (MPI_Comm comm, + vector const & data) + { + // cerr << "QQQ: allgatherv[0]" << endl; + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Exchange the sizes of the data vectors + int const size_in = data.size(); + vector sizes_out (num_procs); + // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl; + MPI_Allgather (const_cast (& size_in), 1, MPI_INT, + & sizes_out.front(), 1, MPI_INT, + comm); + // cerr << "QQQ: allgatherv[2]" << endl; + + // Allocate space for all data vectors + vector offsets_out (num_procs + 1); + offsets_out.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n); + } + int const total_length_out = offsets_out.AT(num_procs); + vector alldata_buffer_out (total_length_out); + + // Exchange all data vectors + T const dummy; + MPI_Datatype const type = mpi_datatype (dummy); + int datatypesize; + MPI_Type_size (type, &datatypesize); + // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl; +#if 0 + MPI_Allgatherv (const_cast (& data.front()), + size_in, type, + & alldata_buffer_out.front(), + & sizes_out.front(), & offsets_out.front(), type, + comm); +#else + int const typesize = sizeof(T); + for (int n = 0; n < num_procs; ++ n) + { + sizes_out.AT(n) *= typesize; + offsets_out.AT(n) *= typesize; + } + MPI_Allgatherv (const_cast (& data.front()), + size_in * typesize, MPI_CHAR, + & alldata_buffer_out.front(), + & sizes_out.front(), & offsets_out.front(), MPI_CHAR, + comm); + for (int n = 0; n < num_procs; ++ n) + { + sizes_out.AT(n) /= typesize; + offsets_out.AT(n) /= typesize; + } +#endif + // cerr << "QQQ: allgatherv[4]" << endl; + + // Convert data buffer to vectors + vector > alldata_out (num_procs); + { + typename vector ::const_iterator p = alldata_buffer_out.begin(); + for (int n = 0; n < num_procs; ++ n) + { + typename vector ::const_iterator const pold = p; + advance (p, sizes_out.AT(n)); + alldata_out.AT(n).assign (pold, p); + } + assert (p == alldata_buffer_out.end()); + } + + // cerr << "QQQ: allgatherv[5]" << endl; + return alldata_out; + } + + + + template + vector + alltoall (MPI_Comm const comm, + vector const & data) + { + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Allocate space for all data + vector alldata (num_procs); + + // Exchange all data vectors + T const dummy; + MPI_Datatype const type = mpi_datatype (dummy); + MPI_Alltoall (& data.front(), 1, type, + & alldata.front(), 1, type, + comm); + + return alldata; + } + + + + template + vector > + alltoallv (MPI_Comm const comm, + vector > const & data) + { + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Exchange the sizes of the data vectors + vector sizes_in (num_procs); + for (int n = 0; n < num_procs; ++ n) + { + sizes_in.AT(n) = data.AT(n).size(); + } + vector sizes_out (num_procs); + MPI_Alltoall (& sizes_in.front(), 1, MPI_INT, + & sizes_out.front(), 1, MPI_INT, + comm); + + // Copy vectors to data buffer + vector offsets_in (num_procs + 1); + offsets_in.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n); + } + int const total_length_in = offsets_in.AT(num_procs); + vector alldata_buffer_in; + alldata_buffer_in.reserve (total_length_in); + for (int n = 0; n < num_procs; ++ n) + { + alldata_buffer_in.insert (alldata_buffer_in.end(), + data.AT(n).begin(), data.AT(n).end()); + } + + // Allocate space for all data vectors + vector offsets_out (num_procs + 1); + offsets_out.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n); + } + int const total_length_out = offsets_out.AT(num_procs); + vector alldata_buffer_out (total_length_out); + + // Exchange all data vectors + T const dummy; + MPI_Datatype const type = mpi_datatype (dummy); + MPI_Alltoallv (& alldata_buffer_in.front(), + & sizes_in.front(), & offsets_in.front(), type, + & alldata_buffer_out.front(), + & sizes_out.front(), & offsets_out.front(), type, + comm); + + // Convert data buffer to vectors + vector > alldata_out (num_procs); + { + typename vector ::const_iterator p = alldata_buffer_out.begin(); + for (int n = 0; n < num_procs; ++ n) + { + typename vector ::const_iterator const pold = p; + advance (p, sizes_out.AT(n)); + alldata_out.AT(n).assign (pold, p); + } + } + + return alldata_out; + } + + + + template + vector + alltoallv1 (MPI_Comm const comm, + vector > const & data) + { + // Get the total number of processors + int num_procs; + MPI_Comm_size (comm, & num_procs); + + // Exchange the sizes of the data vectors + vector sizes_in (num_procs); + for (int n = 0; n < num_procs; ++ n) + { + sizes_in.AT(n) = data.AT(n).size(); + } + vector sizes_out (num_procs); + // cerr << "QQQ: alltoallv1[1]" << endl; + MPI_Alltoall (& sizes_in.front(), 1, MPI_INT, + & sizes_out.front(), 1, MPI_INT, + comm); + // cerr << "QQQ: alltoallv1[2]" << endl; + +#if 0 + // Copy vectors to data buffer + vector offsets_in (num_procs + 1); + offsets_in.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n); + } + int const total_length_in = offsets_in.AT(num_procs); + vector alldata_buffer_in; + alldata_buffer_in.reserve (total_length_in); + for (int n = 0; n < num_procs; ++ n) + { + alldata_buffer_in.insert (alldata_buffer_in.end(), + data.AT(n).begin(), data.AT(n).end()); + } + + // Allocate space for all data vectors + vector offsets_out (num_procs + 1); + offsets_out.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n); + } + int const total_length_out = offsets_out.AT(num_procs); + vector alldata_buffer_out (total_length_out); + + // Exchange all data vectors + T const dummy; + MPI_Datatype const type = mpi_datatype (dummy); + // cerr << "QQQ: alltoallv1[3]" << endl; + MPI_Alltoallv (& alldata_buffer_in.front(), + & sizes_in.front(), & offsets_in.front(), type, + & alldata_buffer_out.front(), + & sizes_out.front(), & offsets_out.front(), type, + comm); + // cerr << "QQQ: alltoallv1[4]" << endl; +#endif + + // Allocate space for all data vectors + vector offsets_out (num_procs + 1); + offsets_out.AT(0) = 0; + for (int n = 0; n < num_procs; ++ n) + { + offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n); + } + int const total_length_out = offsets_out.AT(num_procs); + vector alldata_buffer_out (total_length_out); + + // Exchange all data vectors + T const dummy; + MPI_Datatype const type = mpi_datatype (dummy); + int const tag = 4711; + vector reqs (2 * num_procs); + int nreqs = 0; + // cerr << "QQQ: alltoallv1[5]" << endl; + for (int n = 0; n < num_procs; ++ n) + { + if (sizes_out.AT(n) > 0) { + MPI_Irecv (& alldata_buffer_out.AT(offsets_out.AT(n)), + sizes_out.AT(n), + type, + n, tag, comm, & reqs.AT(nreqs)); + ++ nreqs; + } + } + // cerr << "QQQ: alltoallv1[6]" << endl; + for (int n = 0; n < num_procs; ++ n) + { + if (sizes_in.AT(n) > 0) { + MPI_Isend (const_cast (& data.AT(n).front()), + sizes_in.AT(n), + type, + n, tag, comm, & reqs.AT(nreqs)); + ++ nreqs; + } + } + // cerr << "QQQ: alltoallv1[7]" << endl; + MPI_Waitall (nreqs, & reqs.front(), MPI_STATUSES_IGNORE); + // cerr << "QQQ: alltoallv1[8]" << endl; + + return alldata_buffer_out; + } + + + + template + vector > + allgatherv (MPI_Comm comm, + vector const & data); + + template + vector + alltoallv1 (MPI_Comm comm, + vector > const & data); + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/mpi_string.hh b/Carpet/CarpetLib/src/mpi_string.hh new file mode 100644 index 000000000..6ed8d447d --- /dev/null +++ b/Carpet/CarpetLib/src/mpi_string.hh @@ -0,0 +1,55 @@ +#include +#include + +#include + +#include + + + +namespace CarpetLib +{ + + using namespace std; + + + + // String communication + + vector + allgather_string (MPI_Comm comm, + string const & data); + + vector + alltoallv_string (MPI_Comm comm, + vector const & data); + + string + broadcast_string (MPI_Comm comm, int root, + string const & data); + + + + // Arbitrary datatypes + + template + vector > + allgatherv (MPI_Comm comm, + vector const & data); + + template + vector + alltoall (MPI_Comm comm, + vector const & data); + + template + vector > + alltoallv (MPI_Comm comm, + vector > const & data); + + template + vector + alltoallv1 (MPI_Comm comm, + vector > const & data); + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/operator_prototypes.hh b/Carpet/CarpetLib/src/operator_prototypes.hh index 0eb990923..1feae8a90 100644 --- a/Carpet/CarpetLib/src/operator_prototypes.hh +++ b/Carpet/CarpetLib/src/operator_prototypes.hh @@ -1,253 +1,12 @@ #ifndef OPERATOR_PROTOTYPES #define OPERATOR_PROTOTYPES -#include - -#include - -#include -#include - namespace CarpetLib { - using namespace std; - - - - static inline - size_t - index3 (size_t const i, size_t const j, size_t const k, - size_t const exti, size_t const extj, size_t const extk) - { -#ifdef CARPET_DEBUG - assert (static_cast (i) >= 0 and i < exti); - assert (static_cast (j) >= 0 and j < extj); - assert (static_cast (k) >= 0 and k < extk); -#endif - - return i + exti * (j + extj * k); - } - - - - static int const dim3 = 3; - - typedef vect bvect3; - typedef vect ivect3; - typedef bbox ibbox3; - static int const reffact2 = 2; - - - template - void - copy_3d (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - - template - void - prolongate_3d_o1_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_o3_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_o5_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_o7_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_o9_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_o11_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - - template - void - restrict_3d_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - - template - void - interpolate_3d_2tl (T const * restrict const src1, - CCTK_REAL const t1, - T const * restrict const src2, - CCTK_REAL const t2, - ivect3 const & restrict srcext, - T * restrict const dst, - CCTK_REAL const t, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - interpolate_3d_3tl (T const * restrict const src1, - CCTK_REAL const t1, - T const * restrict const src2, - CCTK_REAL const t2, - T const * restrict const src3, - CCTK_REAL const t3, - ivect3 const & restrict srcext, - T * restrict const dst, - CCTK_REAL const t, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - interpolate_3d_4tl (T const * restrict const src1, - CCTK_REAL const t1, - T const * restrict const src2, - CCTK_REAL const t2, - T const * restrict const src3, - CCTK_REAL const t3, - T const * restrict const src4, - CCTK_REAL const t4, - ivect3 const & restrict srcext, - T * restrict const dst, - CCTK_REAL const t, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - interpolate_3d_5tl (T const * restrict const src1, - CCTK_REAL const t1, - T const * restrict const src2, - CCTK_REAL const t2, - T const * restrict const src3, - CCTK_REAL const t3, - T const * restrict const src4, - CCTK_REAL const t4, - T const * restrict const src5, - CCTK_REAL const t5, - ivect3 const & restrict srcext, - T * restrict const dst, - CCTK_REAL const t, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - interpolate_eno_3d_3tl (T const * restrict const src1, - CCTK_REAL const t1, - T const * restrict const src2, - CCTK_REAL const t2, - T const * restrict const src3, - CCTK_REAL const t3, - ivect3 const & restrict srcext, - T * restrict const dst, - CCTK_REAL const t, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - - template - void - prolongate_3d_cc_rf2_std2prim (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - template - void - prolongate_3d_cc_rf2_prim2std (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - - template - void - restrict_3d_cc_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict regbbox); - - - } // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/operator_prototypes_3d.hh b/Carpet/CarpetLib/src/operator_prototypes_3d.hh new file mode 100644 index 000000000..19deccea8 --- /dev/null +++ b/Carpet/CarpetLib/src/operator_prototypes_3d.hh @@ -0,0 +1,305 @@ +#ifndef OPERATOR_PROTOTYPES_3D +#define OPERATOR_PROTOTYPES_3D + +#include + +#include + +#include "defs.hh" +#include "bbox.hh" +#include "vect.hh" + +#include "operator_prototypes.hh" + + + +namespace CarpetLib { + + using namespace std; + + + + static inline + size_t + index3 (size_t const i, size_t const j, size_t const k, + size_t const exti, size_t const extj, size_t const extk) + CCTK_ATTRIBUTE_CONST; + static inline + size_t + index3 (size_t const i, size_t const j, size_t const k, + size_t const exti, size_t const extj, size_t const extk) + { +#ifdef CARPET_DEBUG + assert (static_cast (i) >= 0 and i < exti); + assert (static_cast (j) >= 0 and j < extj); + assert (static_cast (k) >= 0 and k < extk); +#endif + + return i + exti * (j + extj * k); + } + + + + static int const dim3 = 3; + + typedef vect bvect3; + typedef vect ivect3; + typedef bbox ibbox3; + + + + template + void + copy_3d (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + prolongate_3d_o1_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_o3_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_o5_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_o7_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_o9_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_o11_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + prolongate_3d_cc_o0_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_cc_o1_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_cc_o2_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + restrict_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + interpolate_3d_2tl (T const * restrict const src1, + CCTK_REAL const t1, + T const * restrict const src2, + CCTK_REAL const t2, + ivect3 const & restrict srcext, + T * restrict const dst, + CCTK_REAL const t, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + interpolate_3d_3tl (T const * restrict const src1, + CCTK_REAL const t1, + T const * restrict const src2, + CCTK_REAL const t2, + T const * restrict const src3, + CCTK_REAL const t3, + ivect3 const & restrict srcext, + T * restrict const dst, + CCTK_REAL const t, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + interpolate_3d_4tl (T const * restrict const src1, + CCTK_REAL const t1, + T const * restrict const src2, + CCTK_REAL const t2, + T const * restrict const src3, + CCTK_REAL const t3, + T const * restrict const src4, + CCTK_REAL const t4, + ivect3 const & restrict srcext, + T * restrict const dst, + CCTK_REAL const t, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + interpolate_3d_5tl (T const * restrict const src1, + CCTK_REAL const t1, + T const * restrict const src2, + CCTK_REAL const t2, + T const * restrict const src3, + CCTK_REAL const t3, + T const * restrict const src4, + CCTK_REAL const t4, + T const * restrict const src5, + CCTK_REAL const t5, + ivect3 const & restrict srcext, + T * restrict const dst, + CCTK_REAL const t, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + interpolate_eno_3d_3tl (T const * restrict const src1, + CCTK_REAL const t1, + T const * restrict const src2, + CCTK_REAL const t2, + T const * restrict const src3, + CCTK_REAL const t3, + ivect3 const & restrict srcext, + T * restrict const dst, + CCTK_REAL const t, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + prolongate_3d_cc_rf2_std2prim (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + template + void + prolongate_3d_cc_rf2_prim2std (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + + template + void + restrict_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox); + + + +} // namespace CarpetLib + + + +#endif // #ifndef OPERATOR_PROTOTYPES_3D diff --git a/Carpet/CarpetLib/src/operator_prototypes_4d.hh b/Carpet/CarpetLib/src/operator_prototypes_4d.hh new file mode 100644 index 000000000..afd75c655 --- /dev/null +++ b/Carpet/CarpetLib/src/operator_prototypes_4d.hh @@ -0,0 +1,92 @@ +#ifndef OPERATOR_PROTOTYPES_4D +#define OPERATOR_PROTOTYPES_4D + +#include + +#include + +#include "defs.hh" +#include "bbox.hh" +#include "vect.hh" + +#include "operator_prototypes.hh" + + + +namespace CarpetLib { + + using namespace std; + + + + static inline + size_t + index4 (size_t const i, size_t const j, size_t const k, size_t const kl, + size_t const exti, size_t const extj, size_t const extk, size_t const extl) + CCTK_ATTRIBUTE_CONST; + static inline + size_t + index4 (size_t const i, size_t const j, size_t const k, size_t const l, + size_t const exti, size_t const extj, size_t const extk, size_t const extl) + { +#ifdef CARPET_DEBUG + assert (static_cast (i) >= 0 and i < exti); + assert (static_cast (j) >= 0 and j < extj); + assert (static_cast (k) >= 0 and k < extk); + assert (static_cast (l) >= 0 and l < extl); +#endif + + return i + exti * (j + extj * (k + extk * l)); + } + + + + static int const dim4 = 4; + + typedef vect bvect4; + typedef vect ivect4; + typedef bbox ibbox4; + + + + template + void + copy_4d (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox); + + + + template + void + prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox); + + + + template + void + restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox); + + + +} // namespace CarpetLib + + + +#endif // #ifndef OPERATOR_PROTOTYPES_4D diff --git a/Carpet/CarpetLib/src/operators.hh b/Carpet/CarpetLib/src/operators.hh index 8ecd308a4..07e4e5616 100644 --- a/Carpet/CarpetLib/src/operators.hh +++ b/Carpet/CarpetLib/src/operators.hh @@ -9,11 +9,13 @@ enum operator_type op_none, // do not transport op_sync, // transport only on the same level // (error if called between levels) + op_restrict, // restrict only, do not prolongate op_copy, // use simple copying for prolongation // (needs only one time level) op_Lagrange, // Lagrange interpolation (standard) op_ENO, // use ENO stencils (for hydro) - op_WENO // use WENO stencils (for hydro) + op_WENO, // use WENO stencils (for hydro) + op_Lagrange_monotone // monotone Lagrange interpolation (for hydro) }; #endif // OPERATORS_HH diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc new file mode 100644 index 000000000..352f4c380 --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc @@ -0,0 +1,320 @@ +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_3d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstiext, dstjext, dstkext) + + + + template + void + prolongate_3d_cc_o0_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) + { + typedef typename typeprops::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + if (any (dstbbox.stride() % 2 != 0)) { + CCTK_WARN (0, "Internal error: destination strides are not even"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect3 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0)); + ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + bvect3 const needoffsetlo = srcoff % reffact2 != 0; + bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0; + ivect3 const offsetlo = either (needoffsetlo, 1, 0); + ivect3 const offsethi = either (needoffsethi, 1, 0); + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + + + + // Loop over fine region + // Label scheme: l 8 fk fj fi + + size_t is, js, ks; + size_t id, jd, kd; + size_t i, j, k; + + // begin k loop + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l80; + goto l81; + + // begin j loop + l80: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l800; + goto l801; + + // begin i loop + l800: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8000; + goto l8001; + + // kernel + l8000: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8001; + goto l900; + + // kernel + l8001: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8000; + goto l900; + + // end i loop + l900: + j = j+1; + jd = jd+1; + if (j < regjext) goto l801; + goto l90; + + // begin i loop + l801: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8010; + goto l8011; + + // kernel + l8010: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8011; + goto l901; + + // kernel + l8011: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8010; + goto l901; + + // end i loop + l901: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l800; + goto l90; + + // end j loop + l90: + k = k+1; + kd = kd+1; + if (k < regkext) goto l81; + goto l9; + + // begin j loop + l81: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l810; + goto l811; + + // begin i loop + l810: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8100; + goto l8101; + + // kernel + l8100: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8101; + goto l910; + + // kernel + l8101: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8100; + goto l910; + + // end i loop + l910: + j = j+1; + jd = jd+1; + if (j < regjext) goto l811; + goto l91; + + // begin i loop + l811: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8110; + goto l8111; + + // kernel + l8110: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8111; + goto l911; + + // kernel + l8111: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8110; + goto l911; + + // end i loop + l911: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l810; + goto l91; + + // end j loop + l91: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l80; + goto l9; + + // end k loop + l9:; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_3d_cc_o0_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict regbbox); +#include "instantiate" +#undef INSTANTIATE + + + +} // CarpetLib diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc new file mode 100644 index 000000000..42fc078ee --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc @@ -0,0 +1,390 @@ +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_3d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstiext, dstjext, dstkext) + + + + template + void + prolongate_3d_cc_o1_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) + { + typedef typename typeprops::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + if (any (dstbbox.stride() % 2 != 0)) { + CCTK_WARN (0, "Internal error: destination strides are not even"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect3 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0)); + ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + ivect3 const offsetlo = 1; + ivect3 const offsethi = 1; + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + + RT const one = 1; + + RT const f1 = one/4; + RT const f2 = 3*one/4; + + + + // Loop over fine region + // Label scheme: l 8 fk fj fi + + size_t is, js, ks; + size_t id, jd, kd; + size_t i, j, k; + + // begin k loop + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l80; + goto l81; + + // begin j loop + l80: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l800; + goto l801; + + // begin i loop + l800: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8000; + goto l8001; + + // kernel + l8000: + dst[DSTIND3(id,jd,kd)] = + + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]; + i = i+1; + id = id+1; + if (i < regiext) goto l8001; + goto l900; + + // kernel + l8001: + dst[DSTIND3(id,jd,kd)] = + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8000; + goto l900; + + // end i loop + l900: + j = j+1; + jd = jd+1; + if (j < regjext) goto l801; + goto l90; + + // begin i loop + l801: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8010; + goto l8011; + + // kernel + l8010: + dst[DSTIND3(id,jd,kd)] = + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]; + i = i+1; + id = id+1; + if (i < regiext) goto l8011; + goto l901; + + // kernel + l8011: + dst[DSTIND3(id,jd,kd)] = + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8010; + goto l901; + + // end i loop + l901: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l800; + goto l90; + + // end j loop + l90: + k = k+1; + kd = kd+1; + if (k < regkext) goto l81; + goto l9; + + // begin j loop + l81: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l810; + goto l811; + + // begin i loop + l810: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8100; + goto l8101; + + // kernel + l8100: + dst[DSTIND3(id,jd,kd)] = + + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8101; + goto l910; + + // kernel + l8101: + dst[DSTIND3(id,jd,kd)] = + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8100; + goto l910; + + // end i loop + l910: + j = j+1; + jd = jd+1; + if (j < regjext) goto l811; + goto l91; + + // begin i loop + l811: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8110; + goto l8111; + + // kernel + l8110: + dst[DSTIND3(id,jd,kd)] = + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8111; + goto l911; + + // kernel + l8111: + dst[DSTIND3(id,jd,kd)] = + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)] + + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8110; + goto l911; + + // end i loop + l911: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l810; + goto l91; + + // end j loop + l91: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l80; + goto l9; + + // end k loop + l9:; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_3d_cc_o1_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict regbbox); +#include "instantiate" +#undef INSTANTIATE + + + +} // CarpetLib diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc new file mode 100644 index 000000000..f55dfe9fa --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc @@ -0,0 +1,545 @@ +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_3d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstiext, dstjext, dstkext) + + + + template + void + prolongate_3d_cc_o2_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) + { + typedef typename typeprops::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + if (any (dstbbox.stride() % 2 != 0)) { + CCTK_WARN (0, "Internal error: destination strides are not even"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect3 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0)); + ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + bvect3 const needoffsetlo = srcoff % reffact2 != 0; + bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0; + ivect3 const offsetlo = either (needoffsetlo, 2, 1); + ivect3 const offsethi = either (needoffsethi, 2, 1); + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + + RT const one = 1; + + RT const f1 = 5*one/32; + RT const f2 = 30*one/32; + RT const f3 = -3*one/32; + + + + // Loop over fine region + // Label scheme: l 8 fk fj fi + + size_t is, js, ks; + size_t id, jd, kd; + size_t i, j, k; + + // begin k loop + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l80; + goto l81; + + // begin j loop + l80: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l800; + goto l801; + + // begin i loop + l800: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8000; + goto l8001; + + // kernel + l8000: + dst[DSTIND3(id,jd,kd)] = + + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f3*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f3*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f1*f3*f1 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f3*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f3*f3*f1 * src[SRCIND3(is+1,js+1,ks-1)] + + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f3*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f1*f3*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )] + + f3*f3*f2 * src[SRCIND3(is+1,js+1,ks )] + + f1*f1*f3 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f1*f3 * src[SRCIND3(is ,js-1,ks+1)] + + f3*f1*f3 * src[SRCIND3(is+1,js-1,ks+1)] + + f1*f2*f3 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)] + + f3*f2*f3 * src[SRCIND3(is+1,js ,ks+1)] + + f1*f3*f3 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f3*f3 * src[SRCIND3(is ,js+1,ks+1)] + + f3*f3*f3 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8001; + goto l900; + + // kernel + l8001: + dst[DSTIND3(id,jd,kd)] = + + f3*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)] + + f3*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f3*f3*f1 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f3*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f3*f1 * src[SRCIND3(is+1,js+1,ks-1)] + + f3*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f3*f3*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f3*f2 * src[SRCIND3(is+1,js+1,ks )] + + f3*f1*f3 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f1*f3 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f1*f3 * src[SRCIND3(is+1,js-1,ks+1)] + + f3*f2*f3 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f3 * src[SRCIND3(is+1,js ,ks+1)] + + f3*f3*f3 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f3*f3 * src[SRCIND3(is ,js+1,ks+1)] + + f1*f3*f3 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8000; + goto l900; + + // end i loop + l900: + j = j+1; + jd = jd+1; + if (j < regjext) goto l801; + goto l90; + + // begin i loop + l801: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8010; + goto l8011; + + // kernel + l8010: + dst[DSTIND3(id,jd,kd)] = + + f1*f3*f1 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f3*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f3*f3*f1 * src[SRCIND3(is+1,js-1,ks-1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f3*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f3*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)] + + f1*f3*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )] + + f3*f3*f2 * src[SRCIND3(is+1,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f3*f1*f2 * src[SRCIND3(is+1,js+1,ks )] + + f1*f3*f3 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f3*f3 * src[SRCIND3(is ,js-1,ks+1)] + + f3*f3*f3 * src[SRCIND3(is+1,js-1,ks+1)] + + f1*f2*f3 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)] + + f3*f2*f3 * src[SRCIND3(is+1,js ,ks+1)] + + f1*f1*f3 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f1*f3 * src[SRCIND3(is ,js+1,ks+1)] + + f3*f1*f3 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8011; + goto l901; + + // kernel + l8011: + dst[DSTIND3(id,jd,kd)] = + + f3*f3*f1 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f3*f1 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f3*f1 * src[SRCIND3(is+1,js-1,ks-1)] + + f3*f2*f1 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)] + + f3*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)] + + f3*f3*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f3*f2 * src[SRCIND3(is+1,js-1,ks )] + + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f3*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )] + + f3*f3*f3 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f3*f3 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f3*f3 * src[SRCIND3(is+1,js-1,ks+1)] + + f3*f2*f3 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f3 * src[SRCIND3(is+1,js ,ks+1)] + + f3*f1*f3 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f1*f3 * src[SRCIND3(is ,js+1,ks+1)] + + f1*f1*f3 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8010; + goto l901; + + // end i loop + l901: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l800; + goto l90; + + // end j loop + l90: + k = k+1; + kd = kd+1; + if (k < regkext) goto l81; + goto l9; + + // begin j loop + l81: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l810; + goto l811; + + // begin i loop + l810: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8100; + goto l8101; + + // kernel + l8100: + dst[DSTIND3(id,jd,kd)] = + + f1*f1*f3 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f1*f3 * src[SRCIND3(is ,js-1,ks-1)] + + f3*f1*f3 * src[SRCIND3(is+1,js-1,ks-1)] + + f1*f2*f3 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)] + + f3*f2*f3 * src[SRCIND3(is+1,js ,ks-1)] + + f1*f3*f3 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f3*f3 * src[SRCIND3(is ,js+1,ks-1)] + + f3*f3*f3 * src[SRCIND3(is+1,js+1,ks-1)] + + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f3*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f1*f3*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )] + + f3*f3*f2 * src[SRCIND3(is+1,js+1,ks )] + + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f3*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f3*f2*f1 * src[SRCIND3(is+1,js ,ks+1)] + + f1*f3*f1 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f3*f1 * src[SRCIND3(is ,js+1,ks+1)] + + f3*f3*f1 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8101; + goto l910; + + // kernel + l8101: + dst[DSTIND3(id,jd,kd)] = + + f3*f1*f3 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f1*f3 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f1*f3 * src[SRCIND3(is+1,js-1,ks-1)] + + f3*f2*f3 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f3 * src[SRCIND3(is+1,js ,ks-1)] + + f3*f3*f3 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f3*f3 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f3*f3 * src[SRCIND3(is+1,js+1,ks-1)] + + f3*f1*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )] + + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f3*f3*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f3*f2 * src[SRCIND3(is+1,js+1,ks )] + + f3*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)] + + f3*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)] + + f3*f3*f1 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f3*f1 * src[SRCIND3(is ,js+1,ks+1)] + + f1*f3*f1 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8100; + goto l910; + + // end i loop + l910: + j = j+1; + jd = jd+1; + if (j < regjext) goto l811; + goto l91; + + // begin i loop + l811: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8110; + goto l8111; + + // kernel + l8110: + dst[DSTIND3(id,jd,kd)] = + + f1*f3*f3 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f3*f3 * src[SRCIND3(is ,js-1,ks-1)] + + f3*f3*f3 * src[SRCIND3(is+1,js-1,ks-1)] + + f1*f2*f3 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)] + + f3*f2*f3 * src[SRCIND3(is+1,js ,ks-1)] + + f1*f1*f3 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f1*f3 * src[SRCIND3(is ,js+1,ks-1)] + + f3*f1*f3 * src[SRCIND3(is+1,js+1,ks-1)] + + f1*f3*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )] + + f3*f3*f2 * src[SRCIND3(is+1,js-1,ks )] + + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f3*f1*f2 * src[SRCIND3(is+1,js+1,ks )] + + f1*f3*f1 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f3*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f3*f3*f1 * src[SRCIND3(is+1,js-1,ks+1)] + + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f3*f2*f1 * src[SRCIND3(is+1,js ,ks+1)] + + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)] + + f3*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8111; + goto l911; + + // kernel + l8111: + dst[DSTIND3(id,jd,kd)] = + + f3*f3*f3 * src[SRCIND3(is-1,js-1,ks-1)] + + f2*f3*f3 * src[SRCIND3(is ,js-1,ks-1)] + + f1*f3*f3 * src[SRCIND3(is+1,js-1,ks-1)] + + f3*f2*f3 * src[SRCIND3(is-1,js ,ks-1)] + + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)] + + f1*f2*f3 * src[SRCIND3(is+1,js ,ks-1)] + + f3*f1*f3 * src[SRCIND3(is-1,js+1,ks-1)] + + f2*f1*f3 * src[SRCIND3(is ,js+1,ks-1)] + + f1*f1*f3 * src[SRCIND3(is+1,js+1,ks-1)] + + f3*f3*f2 * src[SRCIND3(is-1,js-1,ks )] + + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )] + + f1*f3*f2 * src[SRCIND3(is+1,js-1,ks )] + + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )] + + f2*f2*f2 * src[SRCIND3(is ,js ,ks )] + + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )] + + f3*f1*f2 * src[SRCIND3(is-1,js+1,ks )] + + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )] + + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )] + + f3*f3*f1 * src[SRCIND3(is-1,js-1,ks+1)] + + f2*f3*f1 * src[SRCIND3(is ,js-1,ks+1)] + + f1*f3*f1 * src[SRCIND3(is+1,js-1,ks+1)] + + f3*f2*f1 * src[SRCIND3(is-1,js ,ks+1)] + + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)] + + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)] + + f3*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)] + + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)] + + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8110; + goto l911; + + // end i loop + l911: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l810; + goto l91; + + // end j loop + l91: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l80; + goto l9; + + // end k loop + l9:; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_3d_cc_o2_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict regbbox); +#include "instantiate" +#undef INSTANTIATE + + + +} // CarpetLib diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc index f58295560..04e00b255 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc @@ -11,7 +11,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -120,7 +120,7 @@ namespace CarpetLib { -#pragma omp parallel for + // NOTE: This loop is not parallel for (int k=0; k void prolongate_3d_cc_rf2_prim2std (T const * restrict const src, - ivect const & restrict srcext, + ivect3 const & restrict srcext, T * restrict const dst, - ivect const & restrict dstext, + ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, ibbox3 const & restrict regbbox) @@ -253,18 +253,19 @@ namespace CarpetLib { -#pragma omp parallel for + // NOTE: This loop is not parallel for (int k=0; k #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -231,7 +231,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -239,7 +239,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -247,7 +247,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = interp0 (& src[SRCIND3(is,js,ks)]); i = i+1; id = id+1; @@ -255,7 +255,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is-5,js,ks)], srcdi); i = i+1; id = id+1; @@ -264,14 +264,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -279,7 +279,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js-5,ks)], srcdj); i = i+1; id = id+1; @@ -287,7 +287,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-5,js-5,ks)], srcdi, srcdj); i = i+1; @@ -297,7 +297,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -305,14 +305,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -320,7 +320,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -328,7 +328,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js,ks-5)], srcdk); i = i+1; id = id+1; @@ -336,7 +336,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-5,js,ks-5)], srcdi, srcdj); i = i+1; @@ -346,14 +346,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -361,7 +361,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is,js-5,ks-5)], srcdj, srcdk); i = i+1; @@ -370,7 +370,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { dst[DSTIND3(id,jd,kd)] = interp3 (& src[SRCIND3(is-5,js-5,ks-5)], srcdi, srcdj, srcdk); @@ -382,7 +382,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -390,7 +390,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -398,7 +398,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc index 72c52f05e..0b1ff0751 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -139,7 +139,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -147,7 +147,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -155,7 +155,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; i = i+1; id = id+1; @@ -163,7 +163,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is ,js,ks)] + f2 * src[SRCIND3(is+1,js,ks)]; @@ -174,14 +174,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -189,7 +189,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js ,ks)] + f2 * src[SRCIND3(is,js+1,ks)]; @@ -199,7 +199,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is ,js ,ks)] + f2*f1 * src[SRCIND3(is+1,js ,ks)] @@ -212,7 +212,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -220,14 +220,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -235,7 +235,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -243,7 +243,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js,ks )] + f2 * src[SRCIND3(is,js,ks+1)]; @@ -253,7 +253,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is ,js,ks )] + f2*f1 * src[SRCIND3(is+1,js,ks )] @@ -266,14 +266,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -281,7 +281,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is,js ,ks )] + f2*f1 * src[SRCIND3(is,js+1,ks )] @@ -293,7 +293,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { T const res1 = + f1*f1*f1 * src[SRCIND3(is ,js ,ks )] @@ -314,7 +314,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -322,7 +322,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -330,7 +330,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc index a55bd69da..91b3e5bc1 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -31,7 +31,7 @@ namespace CarpetLib { prolongate_3d_o3_rf2 (T const * restrict const src, ivect3 const & restrict srcext, T * restrict const dst, - ivect const & restrict dstext, + ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, ibbox3 const & restrict regbbox) @@ -143,7 +143,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -151,7 +151,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -159,7 +159,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; i = i+1; id = id+1; @@ -167,7 +167,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is-1,js,ks)] + f2 * src[SRCIND3(is ,js,ks)] @@ -180,14 +180,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -195,7 +195,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js-1,ks)] + f2 * src[SRCIND3(is,js ,ks)] @@ -207,7 +207,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is-1,js-1,ks)] + f2*f1 * src[SRCIND3(is ,js-1,ks)] @@ -232,7 +232,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -240,14 +240,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -255,7 +255,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -263,7 +263,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js,ks-1)] + f2 * src[SRCIND3(is,js,ks )] @@ -275,7 +275,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is-1,js,ks-1)] + f2*f1 * src[SRCIND3(is ,js,ks-1)] @@ -300,14 +300,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -315,7 +315,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is,js-1,ks-1)] + f2*f1 * src[SRCIND3(is,js ,ks-1)] @@ -339,7 +339,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { T const res1 = + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)] @@ -418,7 +418,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -426,7 +426,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -434,7 +434,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc new file mode 100644 index 000000000..9ac0b2f7e --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc @@ -0,0 +1,851 @@ +// This is meant to reproduce the prolongation algorithm used in the +// SACRA code (based on IH's interpretation of their papers and +// comments in talks, so it might be an idea for someone to talk to +// them! Of course, given that this is "general purpose" and SACRA is +// very specific in the variables converted, it probably won't be +// possible to get a perfect reproduction). +// +// The idea is to use fifth order Lagrange interpolation based on the +// nearest 6 points (in any one dimension). However, we must also +// ensure monotonicity. To do this we check that the result of the +// fifth order result (which is just copied from prolongate_3d_o5_rf2) +// is monotonic with respect to the relevant neighbours), and if not +// we impose linear interpolation instead (from prolongate_3d_o1_rf2). +// +// Note that this code does not work for complex GFs (due to the use +// of the max and min intrinsics). + +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_3d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstiext, dstjext, dstkext) + + + template + inline + T + min4 (T const & x1, T const & x2, T const & x3, T const & x4) + { + return min (min(x1, x2), min (x3, x4)); + } + + template + inline + T + max4 (T const & x1, T const & x2, T const & x3, T const & x4) + { + return max (max(x1, x2), max (x3, x4)); + } + + template + inline + T + min8 (T const & x1, T const & x2, T const & x3, T const & x4, + T const & x5, T const & x6, T const & x7, T const & x8) + { + return min( min (min(x1, x2), min (x3, x4)), + min (min(x5, x6), min (x7, x8)) ); + } + + template + inline + T + max8 (T const & x1, T const & x2, T const & x3, T const & x4, + T const & x5, T const & x6, T const & x7, T const & x8) + { + return max( max (max(x1, x2), max (x3, x4)), + max (max(x5, x6), max (x7, x8)) ); + } + + + template + void + prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) + { + typedef typename typeprops::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect3 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower()) % regbbox.stride() == 0)); + ivect3 const srcoff = (regbbox.lower() - srcbbox.lower()) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + bvect3 const needoffsetlo = srcoff % reffact2 != 0 or regext > 1; + bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0 or regext > 1; + ivect3 const offsetlo = either (needoffsetlo, 3, 0); + ivect3 const offsethi = either (needoffsethi, 3, 0); + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + + RT const one = 1; + + RT const f1 = 3*one/256; + RT const f2 = - 25*one/256; + RT const f3 = 150*one/256; + RT const f4 = 150*one/256; + RT const f5 = - 25*one/256; + RT const f6 = 3*one/256; + + RT const o1_f1 = one/2; + RT const o1_f2 = one/2; + + + // Loop over fine region + // Label scheme: l 8 fk fj fi + + size_t is, js, ks; + size_t id, jd, kd; + size_t i, j, k; + + // begin k loop + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l80; + goto l81; + + // begin j loop + l80: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l800; + goto l801; + + // begin i loop + l800: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8000; + goto l8001; + + // kernel + l8000: + dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; + i = i+1; + id = id+1; + if (i < regiext) goto l8001; + goto l900; + + // kernel + l8001: + dst[DSTIND3(id,jd,kd)] = + + f1 * src[SRCIND3(is-2,js,ks)] + + f2 * src[SRCIND3(is-1,js,ks)] + + f3 * src[SRCIND3(is ,js,ks)] + + f4 * src[SRCIND3(is+1,js,ks)] + + f5 * src[SRCIND3(is+2,js,ks)] + + f6 * src[SRCIND3(is+3,js,ks)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )]))|| + (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1 * src[SRCIND3(is ,js,ks)] + + o1_f2 * src[SRCIND3(is+1,js,ks)]; + + } + + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8000; + goto l900; + + // end i loop + l900: + j = j+1; + jd = jd+1; + if (j < regjext) goto l801; + goto l90; + + // begin i loop + l801: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8010; + goto l8011; + + // kernel + l8010: + dst[DSTIND3(id,jd,kd)] = + + f1 * src[SRCIND3(is,js-2,ks)] + + f2 * src[SRCIND3(is,js-1,ks)] + + f3 * src[SRCIND3(is,js ,ks)] + + f4 * src[SRCIND3(is,js+1,ks)] + + f5 * src[SRCIND3(is,js+2,ks)] + + f6 * src[SRCIND3(is,js+3,ks)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js+1,ks )]))|| + (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js+1,ks )]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1 * src[SRCIND3(is,js ,ks)] + + o1_f2 * src[SRCIND3(is,js+1,ks)]; + + } + i = i+1; + id = id+1; + if (i < regiext) goto l8011; + goto l901; + + // kernel + l8011: + dst[DSTIND3(id,jd,kd)] = + + f1*f1 * src[SRCIND3(is-2,js-2,ks)] + + f2*f1 * src[SRCIND3(is-1,js-2,ks)] + + f3*f1 * src[SRCIND3(is ,js-2,ks)] + + f4*f1 * src[SRCIND3(is+1,js-2,ks)] + + f5*f1 * src[SRCIND3(is+2,js-2,ks)] + + f6*f1 * src[SRCIND3(is+3,js-2,ks)] + + f1*f2 * src[SRCIND3(is-2,js-1,ks)] + + f2*f2 * src[SRCIND3(is-1,js-1,ks)] + + f3*f2 * src[SRCIND3(is ,js-1,ks)] + + f4*f2 * src[SRCIND3(is+1,js-1,ks)] + + f5*f2 * src[SRCIND3(is+2,js-1,ks)] + + f6*f2 * src[SRCIND3(is+3,js-1,ks)] + + f1*f3 * src[SRCIND3(is-2,js ,ks)] + + f2*f3 * src[SRCIND3(is-1,js ,ks)] + + f3*f3 * src[SRCIND3(is ,js ,ks)] + + f4*f3 * src[SRCIND3(is+1,js ,ks)] + + f5*f3 * src[SRCIND3(is+2,js ,ks)] + + f6*f3 * src[SRCIND3(is+3,js ,ks)] + + f1*f4 * src[SRCIND3(is-2,js+1,ks)] + + f2*f4 * src[SRCIND3(is-1,js+1,ks)] + + f3*f4 * src[SRCIND3(is ,js+1,ks)] + + f4*f4 * src[SRCIND3(is+1,js+1,ks)] + + f5*f4 * src[SRCIND3(is+2,js+1,ks)] + + f6*f4 * src[SRCIND3(is+3,js+1,ks)] + + f1*f5 * src[SRCIND3(is-2,js+2,ks)] + + f2*f5 * src[SRCIND3(is-1,js+2,ks)] + + f3*f5 * src[SRCIND3(is ,js+2,ks)] + + f4*f5 * src[SRCIND3(is+1,js+2,ks)] + + f5*f5 * src[SRCIND3(is+2,js+2,ks)] + + f6*f5 * src[SRCIND3(is+3,js+2,ks)] + + f1*f6 * src[SRCIND3(is-2,js+3,ks)] + + f2*f6 * src[SRCIND3(is-1,js+3,ks)] + + f3*f6 * src[SRCIND3(is ,js+3,ks)] + + f4*f6 * src[SRCIND3(is+1,js+3,ks)] + + f5*f6 * src[SRCIND3(is+2,js+3,ks)] + + f6*f6 * src[SRCIND3(is+3,js+3,ks)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is+1,js+1,ks )]))|| + (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is+1,js+1,ks )]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1*o1_f1 * src[SRCIND3(is ,js ,ks)] + + o1_f2*o1_f1 * src[SRCIND3(is+1,js ,ks)] + + o1_f1*o1_f2 * src[SRCIND3(is ,js+1,ks)] + + o1_f2*o1_f2 * src[SRCIND3(is+1,js+1,ks)]; + } + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8010; + goto l901; + + // end i loop + l901: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l800; + goto l90; + + // end j loop + l90: + k = k+1; + kd = kd+1; + if (k < regkext) goto l81; + goto l9; + + // begin j loop + l81: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l810; + goto l811; + + // begin i loop + l810: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8100; + goto l8101; + + // kernel + l8100: + dst[DSTIND3(id,jd,kd)] = + + f1 * src[SRCIND3(is,js,ks-2)] + + f2 * src[SRCIND3(is,js,ks-1)] + + f3 * src[SRCIND3(is,js,ks )] + + f4 * src[SRCIND3(is,js,ks+1)] + + f5 * src[SRCIND3(is,js,ks+2)] + + f6 * src[SRCIND3(is,js,ks+3)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js ,ks+1)]))|| + (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js ,ks+1)]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1 * src[SRCIND3(is,js,ks )] + + o1_f2 * src[SRCIND3(is,js,ks+1)]; + } + i = i+1; + id = id+1; + if (i < regiext) goto l8101; + goto l910; + + // kernel + l8101: + dst[DSTIND3(id,jd,kd)] = + + f1*f1 * src[SRCIND3(is-2,js,ks-2)] + + f2*f1 * src[SRCIND3(is-1,js,ks-2)] + + f3*f1 * src[SRCIND3(is ,js,ks-2)] + + f4*f1 * src[SRCIND3(is+1,js,ks-2)] + + f5*f1 * src[SRCIND3(is+2,js,ks-2)] + + f6*f1 * src[SRCIND3(is+3,js,ks-2)] + + f1*f2 * src[SRCIND3(is-2,js,ks-1)] + + f2*f2 * src[SRCIND3(is-1,js,ks-1)] + + f3*f2 * src[SRCIND3(is ,js,ks-1)] + + f4*f2 * src[SRCIND3(is+1,js,ks-1)] + + f5*f2 * src[SRCIND3(is+2,js,ks-1)] + + f6*f2 * src[SRCIND3(is+3,js,ks-1)] + + f1*f3 * src[SRCIND3(is-2,js,ks )] + + f2*f3 * src[SRCIND3(is-1,js,ks )] + + f3*f3 * src[SRCIND3(is ,js,ks )] + + f4*f3 * src[SRCIND3(is+1,js,ks )] + + f5*f3 * src[SRCIND3(is+2,js,ks )] + + f6*f3 * src[SRCIND3(is+3,js,ks )] + + f1*f4 * src[SRCIND3(is-2,js,ks+1)] + + f2*f4 * src[SRCIND3(is-1,js,ks+1)] + + f3*f4 * src[SRCIND3(is ,js,ks+1)] + + f4*f4 * src[SRCIND3(is+1,js,ks+1)] + + f5*f4 * src[SRCIND3(is+2,js,ks+1)] + + f6*f4 * src[SRCIND3(is+3,js,ks+1)] + + f1*f5 * src[SRCIND3(is-2,js,ks+2)] + + f2*f5 * src[SRCIND3(is-1,js,ks+2)] + + f3*f5 * src[SRCIND3(is ,js,ks+2)] + + f4*f5 * src[SRCIND3(is+1,js,ks+2)] + + f5*f5 * src[SRCIND3(is+2,js,ks+2)] + + f6*f5 * src[SRCIND3(is+3,js,ks+2)] + + f1*f6 * src[SRCIND3(is-2,js,ks+3)] + + f2*f6 * src[SRCIND3(is-1,js,ks+3)] + + f3*f6 * src[SRCIND3(is ,js,ks+3)] + + f4*f6 * src[SRCIND3(is+1,js,ks+3)] + + f5*f6 * src[SRCIND3(is+2,js,ks+3)] + + f6*f6 * src[SRCIND3(is+3,js,ks+3)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is+1,js ,ks+1)]))|| + (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is+1,js ,ks+1)]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1*o1_f1 * src[SRCIND3(is ,js,ks )] + + o1_f2*o1_f1 * src[SRCIND3(is+1,js,ks )] + + o1_f1*o1_f2 * src[SRCIND3(is ,js,ks+1)] + + o1_f2*o1_f2 * src[SRCIND3(is+1,js,ks+1)]; + } + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8100; + goto l910; + + // end i loop + l910: + j = j+1; + jd = jd+1; + if (j < regjext) goto l811; + goto l91; + + // begin i loop + l811: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l8110; + goto l8111; + + // kernel + l8110: + dst[DSTIND3(id,jd,kd)] = + + f1*f1 * src[SRCIND3(is,js-2,ks-2)] + + f2*f1 * src[SRCIND3(is,js-1,ks-2)] + + f3*f1 * src[SRCIND3(is,js ,ks-2)] + + f4*f1 * src[SRCIND3(is,js+1,ks-2)] + + f5*f1 * src[SRCIND3(is,js+2,ks-2)] + + f6*f1 * src[SRCIND3(is,js+3,ks-2)] + + f1*f2 * src[SRCIND3(is,js-2,ks-1)] + + f2*f2 * src[SRCIND3(is,js-1,ks-1)] + + f3*f2 * src[SRCIND3(is,js ,ks-1)] + + f4*f2 * src[SRCIND3(is,js+1,ks-1)] + + f5*f2 * src[SRCIND3(is,js+2,ks-1)] + + f6*f2 * src[SRCIND3(is,js+3,ks-1)] + + f1*f3 * src[SRCIND3(is,js-2,ks )] + + f2*f3 * src[SRCIND3(is,js-1,ks )] + + f3*f3 * src[SRCIND3(is,js ,ks )] + + f4*f3 * src[SRCIND3(is,js+1,ks )] + + f5*f3 * src[SRCIND3(is,js+2,ks )] + + f6*f3 * src[SRCIND3(is,js+3,ks )] + + f1*f4 * src[SRCIND3(is,js-2,ks+1)] + + f2*f4 * src[SRCIND3(is,js-1,ks+1)] + + f3*f4 * src[SRCIND3(is,js ,ks+1)] + + f4*f4 * src[SRCIND3(is,js+1,ks+1)] + + f5*f4 * src[SRCIND3(is,js+2,ks+1)] + + f6*f4 * src[SRCIND3(is,js+3,ks+1)] + + f1*f5 * src[SRCIND3(is,js-2,ks+2)] + + f2*f5 * src[SRCIND3(is,js-1,ks+2)] + + f3*f5 * src[SRCIND3(is,js ,ks+2)] + + f4*f5 * src[SRCIND3(is,js+1,ks+2)] + + f5*f5 * src[SRCIND3(is,js+2,ks+2)] + + f6*f5 * src[SRCIND3(is,js+3,ks+2)] + + f1*f6 * src[SRCIND3(is,js-2,ks+3)] + + f2*f6 * src[SRCIND3(is,js-1,ks+3)] + + f3*f6 * src[SRCIND3(is,js ,ks+3)] + + f4*f6 * src[SRCIND3(is,js+1,ks+3)] + + f5*f6 * src[SRCIND3(is,js+2,ks+3)] + + f6*f6 * src[SRCIND3(is,js+3,ks+3)]; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is ,js+1,ks+1)]))|| + (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is ,js+1,ks+1)]))) { + dst[DSTIND3(id,jd,kd)] = + + o1_f1*o1_f1 * src[SRCIND3(is,js ,ks )] + + o1_f2*o1_f1 * src[SRCIND3(is,js+1,ks )] + + o1_f1*o1_f2 * src[SRCIND3(is,js ,ks+1)] + + o1_f2*o1_f2 * src[SRCIND3(is,js+1,ks+1)]; + } + i = i+1; + id = id+1; + if (i < regiext) goto l8111; + goto l911; + + // kernel + l8111: + { + T const res1 = + + f1*f1*f1 * src[SRCIND3(is-2,js-2,ks-2)] + + f2*f1*f1 * src[SRCIND3(is-1,js-2,ks-2)] + + f3*f1*f1 * src[SRCIND3(is ,js-2,ks-2)] + + f4*f1*f1 * src[SRCIND3(is+1,js-2,ks-2)] + + f5*f1*f1 * src[SRCIND3(is+2,js-2,ks-2)] + + f6*f1*f1 * src[SRCIND3(is+3,js-2,ks-2)] + + f1*f2*f1 * src[SRCIND3(is-2,js-1,ks-2)] + + f2*f2*f1 * src[SRCIND3(is-1,js-1,ks-2)] + + f3*f2*f1 * src[SRCIND3(is ,js-1,ks-2)] + + f4*f2*f1 * src[SRCIND3(is+1,js-1,ks-2)] + + f5*f2*f1 * src[SRCIND3(is+2,js-1,ks-2)] + + f6*f2*f1 * src[SRCIND3(is+3,js-1,ks-2)] + + f1*f3*f1 * src[SRCIND3(is-2,js ,ks-2)] + + f2*f3*f1 * src[SRCIND3(is-1,js ,ks-2)] + + f3*f3*f1 * src[SRCIND3(is ,js ,ks-2)] + + f4*f3*f1 * src[SRCIND3(is+1,js ,ks-2)] + + f5*f3*f1 * src[SRCIND3(is+2,js ,ks-2)] + + f6*f3*f1 * src[SRCIND3(is+3,js ,ks-2)] + + f1*f4*f1 * src[SRCIND3(is-2,js+1,ks-2)] + + f2*f4*f1 * src[SRCIND3(is-1,js+1,ks-2)] + + f3*f4*f1 * src[SRCIND3(is ,js+1,ks-2)] + + f4*f4*f1 * src[SRCIND3(is+1,js+1,ks-2)] + + f5*f4*f1 * src[SRCIND3(is+2,js+1,ks-2)] + + f6*f4*f1 * src[SRCIND3(is+3,js+1,ks-2)] + + f1*f5*f1 * src[SRCIND3(is-2,js+2,ks-2)] + + f2*f5*f1 * src[SRCIND3(is-1,js+2,ks-2)] + + f3*f5*f1 * src[SRCIND3(is ,js+2,ks-2)] + + f4*f5*f1 * src[SRCIND3(is+1,js+2,ks-2)] + + f5*f5*f1 * src[SRCIND3(is+2,js+2,ks-2)] + + f6*f5*f1 * src[SRCIND3(is+3,js+2,ks-2)] + + f1*f6*f1 * src[SRCIND3(is-2,js+3,ks-2)] + + f2*f6*f1 * src[SRCIND3(is-1,js+3,ks-2)] + + f3*f6*f1 * src[SRCIND3(is ,js+3,ks-2)] + + f4*f6*f1 * src[SRCIND3(is+1,js+3,ks-2)] + + f5*f6*f1 * src[SRCIND3(is+2,js+3,ks-2)] + + f6*f6*f1 * src[SRCIND3(is+3,js+3,ks-2)]; + T const res2 = + + f1*f1*f2 * src[SRCIND3(is-2,js-2,ks-1)] + + f2*f1*f2 * src[SRCIND3(is-1,js-2,ks-1)] + + f3*f1*f2 * src[SRCIND3(is ,js-2,ks-1)] + + f4*f1*f2 * src[SRCIND3(is+1,js-2,ks-1)] + + f5*f1*f2 * src[SRCIND3(is+2,js-2,ks-1)] + + f6*f1*f2 * src[SRCIND3(is+3,js-2,ks-1)] + + f1*f2*f2 * src[SRCIND3(is-2,js-1,ks-1)] + + f2*f2*f2 * src[SRCIND3(is-1,js-1,ks-1)] + + f3*f2*f2 * src[SRCIND3(is ,js-1,ks-1)] + + f4*f2*f2 * src[SRCIND3(is+1,js-1,ks-1)] + + f5*f2*f2 * src[SRCIND3(is+2,js-1,ks-1)] + + f6*f2*f2 * src[SRCIND3(is+3,js-1,ks-1)] + + f1*f3*f2 * src[SRCIND3(is-2,js ,ks-1)] + + f2*f3*f2 * src[SRCIND3(is-1,js ,ks-1)] + + f3*f3*f2 * src[SRCIND3(is ,js ,ks-1)] + + f4*f3*f2 * src[SRCIND3(is+1,js ,ks-1)] + + f5*f3*f2 * src[SRCIND3(is+2,js ,ks-1)] + + f6*f3*f2 * src[SRCIND3(is+3,js ,ks-1)] + + f1*f4*f2 * src[SRCIND3(is-2,js+1,ks-1)] + + f2*f4*f2 * src[SRCIND3(is-1,js+1,ks-1)] + + f3*f4*f2 * src[SRCIND3(is ,js+1,ks-1)] + + f4*f4*f2 * src[SRCIND3(is+1,js+1,ks-1)] + + f5*f4*f2 * src[SRCIND3(is+2,js+1,ks-1)] + + f6*f4*f2 * src[SRCIND3(is+3,js+1,ks-1)] + + f1*f5*f2 * src[SRCIND3(is-2,js+2,ks-1)] + + f2*f5*f2 * src[SRCIND3(is-1,js+2,ks-1)] + + f3*f5*f2 * src[SRCIND3(is ,js+2,ks-1)] + + f4*f5*f2 * src[SRCIND3(is+1,js+2,ks-1)] + + f5*f5*f2 * src[SRCIND3(is+2,js+2,ks-1)] + + f6*f5*f2 * src[SRCIND3(is+3,js+2,ks-1)] + + f1*f6*f2 * src[SRCIND3(is-2,js+3,ks-1)] + + f2*f6*f2 * src[SRCIND3(is-1,js+3,ks-1)] + + f3*f6*f2 * src[SRCIND3(is ,js+3,ks-1)] + + f4*f6*f2 * src[SRCIND3(is+1,js+3,ks-1)] + + f5*f6*f2 * src[SRCIND3(is+2,js+3,ks-1)] + + f6*f6*f2 * src[SRCIND3(is+3,js+3,ks-1)]; + T const res3 = + + f1*f1*f3 * src[SRCIND3(is-2,js-2,ks )] + + f2*f1*f3 * src[SRCIND3(is-1,js-2,ks )] + + f3*f1*f3 * src[SRCIND3(is ,js-2,ks )] + + f4*f1*f3 * src[SRCIND3(is+1,js-2,ks )] + + f5*f1*f3 * src[SRCIND3(is+2,js-2,ks )] + + f6*f1*f3 * src[SRCIND3(is+3,js-2,ks )] + + f1*f2*f3 * src[SRCIND3(is-2,js-1,ks )] + + f2*f2*f3 * src[SRCIND3(is-1,js-1,ks )] + + f3*f2*f3 * src[SRCIND3(is ,js-1,ks )] + + f4*f2*f3 * src[SRCIND3(is+1,js-1,ks )] + + f5*f2*f3 * src[SRCIND3(is+2,js-1,ks )] + + f6*f2*f3 * src[SRCIND3(is+3,js-1,ks )] + + f1*f3*f3 * src[SRCIND3(is-2,js ,ks )] + + f2*f3*f3 * src[SRCIND3(is-1,js ,ks )] + + f3*f3*f3 * src[SRCIND3(is ,js ,ks )] + + f4*f3*f3 * src[SRCIND3(is+1,js ,ks )] + + f5*f3*f3 * src[SRCIND3(is+2,js ,ks )] + + f6*f3*f3 * src[SRCIND3(is+3,js ,ks )] + + f1*f4*f3 * src[SRCIND3(is-2,js+1,ks )] + + f2*f4*f3 * src[SRCIND3(is-1,js+1,ks )] + + f3*f4*f3 * src[SRCIND3(is ,js+1,ks )] + + f4*f4*f3 * src[SRCIND3(is+1,js+1,ks )] + + f5*f4*f3 * src[SRCIND3(is+2,js+1,ks )] + + f6*f4*f3 * src[SRCIND3(is+3,js+1,ks )] + + f1*f5*f3 * src[SRCIND3(is-2,js+2,ks )] + + f2*f5*f3 * src[SRCIND3(is-1,js+2,ks )] + + f3*f5*f3 * src[SRCIND3(is ,js+2,ks )] + + f4*f5*f3 * src[SRCIND3(is+1,js+2,ks )] + + f5*f5*f3 * src[SRCIND3(is+2,js+2,ks )] + + f6*f5*f3 * src[SRCIND3(is+3,js+2,ks )] + + f1*f6*f3 * src[SRCIND3(is-2,js+3,ks )] + + f2*f6*f3 * src[SRCIND3(is-1,js+3,ks )] + + f3*f6*f3 * src[SRCIND3(is ,js+3,ks )] + + f4*f6*f3 * src[SRCIND3(is+1,js+3,ks )] + + f5*f6*f3 * src[SRCIND3(is+2,js+3,ks )] + + f6*f6*f3 * src[SRCIND3(is+3,js+3,ks )]; + T const res4 = + + f1*f1*f4 * src[SRCIND3(is-2,js-2,ks+1)] + + f2*f1*f4 * src[SRCIND3(is-1,js-2,ks+1)] + + f3*f1*f4 * src[SRCIND3(is ,js-2,ks+1)] + + f4*f1*f4 * src[SRCIND3(is+1,js-2,ks+1)] + + f5*f1*f4 * src[SRCIND3(is+2,js-2,ks+1)] + + f6*f1*f4 * src[SRCIND3(is+3,js-2,ks+1)] + + f1*f2*f4 * src[SRCIND3(is-2,js-1,ks+1)] + + f2*f2*f4 * src[SRCIND3(is-1,js-1,ks+1)] + + f3*f2*f4 * src[SRCIND3(is ,js-1,ks+1)] + + f4*f2*f4 * src[SRCIND3(is+1,js-1,ks+1)] + + f5*f2*f4 * src[SRCIND3(is+2,js-1,ks+1)] + + f6*f2*f4 * src[SRCIND3(is+3,js-1,ks+1)] + + f1*f3*f4 * src[SRCIND3(is-2,js ,ks+1)] + + f2*f3*f4 * src[SRCIND3(is-1,js ,ks+1)] + + f3*f3*f4 * src[SRCIND3(is ,js ,ks+1)] + + f4*f3*f4 * src[SRCIND3(is+1,js ,ks+1)] + + f5*f3*f4 * src[SRCIND3(is+2,js ,ks+1)] + + f6*f3*f4 * src[SRCIND3(is+3,js ,ks+1)] + + f1*f4*f4 * src[SRCIND3(is-2,js+1,ks+1)] + + f2*f4*f4 * src[SRCIND3(is-1,js+1,ks+1)] + + f3*f4*f4 * src[SRCIND3(is ,js+1,ks+1)] + + f4*f4*f4 * src[SRCIND3(is+1,js+1,ks+1)] + + f5*f4*f4 * src[SRCIND3(is+2,js+1,ks+1)] + + f6*f4*f4 * src[SRCIND3(is+3,js+1,ks+1)] + + f1*f5*f4 * src[SRCIND3(is-2,js+2,ks+1)] + + f2*f5*f4 * src[SRCIND3(is-1,js+2,ks+1)] + + f3*f5*f4 * src[SRCIND3(is ,js+2,ks+1)] + + f4*f5*f4 * src[SRCIND3(is+1,js+2,ks+1)] + + f5*f5*f4 * src[SRCIND3(is+2,js+2,ks+1)] + + f6*f5*f4 * src[SRCIND3(is+3,js+2,ks+1)] + + f1*f6*f4 * src[SRCIND3(is-2,js+3,ks+1)] + + f2*f6*f4 * src[SRCIND3(is-1,js+3,ks+1)] + + f3*f6*f4 * src[SRCIND3(is ,js+3,ks+1)] + + f4*f6*f4 * src[SRCIND3(is+1,js+3,ks+1)] + + f5*f6*f4 * src[SRCIND3(is+2,js+3,ks+1)] + + f6*f6*f4 * src[SRCIND3(is+3,js+3,ks+1)]; + T const res5 = + + f1*f1*f5 * src[SRCIND3(is-2,js-2,ks+2)] + + f2*f1*f5 * src[SRCIND3(is-1,js-2,ks+2)] + + f3*f1*f5 * src[SRCIND3(is ,js-2,ks+2)] + + f4*f1*f5 * src[SRCIND3(is+1,js-2,ks+2)] + + f5*f1*f5 * src[SRCIND3(is+2,js-2,ks+2)] + + f6*f1*f5 * src[SRCIND3(is+3,js-2,ks+2)] + + f1*f2*f5 * src[SRCIND3(is-2,js-1,ks+2)] + + f2*f2*f5 * src[SRCIND3(is-1,js-1,ks+2)] + + f3*f2*f5 * src[SRCIND3(is ,js-1,ks+2)] + + f4*f2*f5 * src[SRCIND3(is+1,js-1,ks+2)] + + f5*f2*f5 * src[SRCIND3(is+2,js-1,ks+2)] + + f6*f2*f5 * src[SRCIND3(is+3,js-1,ks+2)] + + f1*f3*f5 * src[SRCIND3(is-2,js ,ks+2)] + + f2*f3*f5 * src[SRCIND3(is-1,js ,ks+2)] + + f3*f3*f5 * src[SRCIND3(is ,js ,ks+2)] + + f4*f3*f5 * src[SRCIND3(is+1,js ,ks+2)] + + f5*f3*f5 * src[SRCIND3(is+2,js ,ks+2)] + + f6*f3*f5 * src[SRCIND3(is+3,js ,ks+2)] + + f1*f4*f5 * src[SRCIND3(is-2,js+1,ks+2)] + + f2*f4*f5 * src[SRCIND3(is-1,js+1,ks+2)] + + f3*f4*f5 * src[SRCIND3(is ,js+1,ks+2)] + + f4*f4*f5 * src[SRCIND3(is+1,js+1,ks+2)] + + f5*f4*f5 * src[SRCIND3(is+2,js+1,ks+2)] + + f6*f4*f5 * src[SRCIND3(is+3,js+1,ks+2)] + + f1*f5*f5 * src[SRCIND3(is-2,js+2,ks+2)] + + f2*f5*f5 * src[SRCIND3(is-1,js+2,ks+2)] + + f3*f5*f5 * src[SRCIND3(is ,js+2,ks+2)] + + f4*f5*f5 * src[SRCIND3(is+1,js+2,ks+2)] + + f5*f5*f5 * src[SRCIND3(is+2,js+2,ks+2)] + + f6*f5*f5 * src[SRCIND3(is+3,js+2,ks+2)] + + f1*f6*f5 * src[SRCIND3(is-2,js+3,ks+2)] + + f2*f6*f5 * src[SRCIND3(is-1,js+3,ks+2)] + + f3*f6*f5 * src[SRCIND3(is ,js+3,ks+2)] + + f4*f6*f5 * src[SRCIND3(is+1,js+3,ks+2)] + + f5*f6*f5 * src[SRCIND3(is+2,js+3,ks+2)] + + f6*f6*f5 * src[SRCIND3(is+3,js+3,ks+2)]; + T const res6 = + + f1*f1*f6 * src[SRCIND3(is-2,js-2,ks+3)] + + f2*f1*f6 * src[SRCIND3(is-1,js-2,ks+3)] + + f3*f1*f6 * src[SRCIND3(is ,js-2,ks+3)] + + f4*f1*f6 * src[SRCIND3(is+1,js-2,ks+3)] + + f5*f1*f6 * src[SRCIND3(is+2,js-2,ks+3)] + + f6*f1*f6 * src[SRCIND3(is+3,js-2,ks+3)] + + f1*f2*f6 * src[SRCIND3(is-2,js-1,ks+3)] + + f2*f2*f6 * src[SRCIND3(is-1,js-1,ks+3)] + + f3*f2*f6 * src[SRCIND3(is ,js-1,ks+3)] + + f4*f2*f6 * src[SRCIND3(is+1,js-1,ks+3)] + + f5*f2*f6 * src[SRCIND3(is+2,js-1,ks+3)] + + f6*f2*f6 * src[SRCIND3(is+3,js-1,ks+3)] + + f1*f3*f6 * src[SRCIND3(is-2,js ,ks+3)] + + f2*f3*f6 * src[SRCIND3(is-1,js ,ks+3)] + + f3*f3*f6 * src[SRCIND3(is ,js ,ks+3)] + + f4*f3*f6 * src[SRCIND3(is+1,js ,ks+3)] + + f5*f3*f6 * src[SRCIND3(is+2,js ,ks+3)] + + f6*f3*f6 * src[SRCIND3(is+3,js ,ks+3)] + + f1*f4*f6 * src[SRCIND3(is-2,js+1,ks+3)] + + f2*f4*f6 * src[SRCIND3(is-1,js+1,ks+3)] + + f3*f4*f6 * src[SRCIND3(is ,js+1,ks+3)] + + f4*f4*f6 * src[SRCIND3(is+1,js+1,ks+3)] + + f5*f4*f6 * src[SRCIND3(is+2,js+1,ks+3)] + + f6*f4*f6 * src[SRCIND3(is+3,js+1,ks+3)] + + f1*f5*f6 * src[SRCIND3(is-2,js+2,ks+3)] + + f2*f5*f6 * src[SRCIND3(is-1,js+2,ks+3)] + + f3*f5*f6 * src[SRCIND3(is ,js+2,ks+3)] + + f4*f5*f6 * src[SRCIND3(is+1,js+2,ks+3)] + + f5*f5*f6 * src[SRCIND3(is+2,js+2,ks+3)] + + f6*f5*f6 * src[SRCIND3(is+3,js+2,ks+3)] + + f1*f6*f6 * src[SRCIND3(is-2,js+3,ks+3)] + + f2*f6*f6 * src[SRCIND3(is-1,js+3,ks+3)] + + f3*f6*f6 * src[SRCIND3(is ,js+3,ks+3)] + + f4*f6*f6 * src[SRCIND3(is+1,js+3,ks+3)] + + f5*f6*f6 * src[SRCIND3(is+2,js+3,ks+3)] + + f6*f6*f6 * src[SRCIND3(is+3,js+3,ks+3)]; + dst[DSTIND3(id,jd,kd)] = res1 + res2 + res3 + res4 + res5 + res6; + // Monotonicity enforcement + if ((dst[DSTIND3(id,jd,kd)] > max8(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is+1,js+1,ks )], + src[SRCIND3(is+1,js ,ks+1)], + src[SRCIND3(is ,js+1,ks+1)], + src[SRCIND3(is+1,js+1,ks+1)]))|| + (dst[DSTIND3(id,jd,kd)] < min8(src[SRCIND3(is ,js ,ks )], + src[SRCIND3(is+1,js ,ks )], + src[SRCIND3(is ,js+1,ks )], + src[SRCIND3(is ,js ,ks+1)], + src[SRCIND3(is+1,js+1,ks )], + src[SRCIND3(is+1,js ,ks+1)], + src[SRCIND3(is ,js+1,ks+1)], + src[SRCIND3(is+1,js+1,ks+1)]))) { + T const res1 = + + o1_f1*o1_f1*o1_f1 * src[SRCIND3(is ,js ,ks )] + + o1_f2*o1_f1*o1_f1 * src[SRCIND3(is+1,js ,ks )] + + o1_f1*o1_f2*o1_f1 * src[SRCIND3(is ,js+1,ks )] + + o1_f2*o1_f2*o1_f1 * src[SRCIND3(is+1,js+1,ks )]; + T const res2 = + + o1_f1*o1_f1*o1_f2 * src[SRCIND3(is ,js ,ks+1)] + + o1_f2*o1_f1*o1_f2 * src[SRCIND3(is+1,js ,ks+1)] + + o1_f1*o1_f2*o1_f2 * src[SRCIND3(is ,js+1,ks+1)] + + o1_f2*o1_f2*o1_f2 * src[SRCIND3(is+1,js+1,ks+1)]; + dst[DSTIND3(id,jd,kd)] = res1 + res2; + } + } + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l8110; + goto l911; + + // end i loop + l911: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l810; + goto l91; + + // end j loop + l91: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l80; + goto l9; + + // end k loop + l9:; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_3d_o5_monotone_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict regbbox); +#define CARPET_NO_COMPLEX +#include "instantiate" +#undef CARPET_NO_COMPLEX +#undef INSTANTIATE + + template <> + void + prolongate_3d_o5_monotone_rf2 (CCTK_COMPLEX const * restrict const src, + ivect3 const & restrict srcext, + CCTK_COMPLEX * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) + { + CCTK_WARN(0, "This should never be called!"); + } + + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc index 71133b004..6cbde8cde 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -143,7 +143,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -151,7 +151,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -159,7 +159,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)]; i = i+1; id = id+1; @@ -167,7 +167,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is-2,js,ks)] + f2 * src[SRCIND3(is-1,js,ks)] @@ -182,14 +182,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -197,7 +197,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js-2,ks)] + f2 * src[SRCIND3(is,js-1,ks)] @@ -211,7 +211,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is-2,js-2,ks)] + f2*f1 * src[SRCIND3(is-1,js-2,ks)] @@ -256,7 +256,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -264,14 +264,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -279,7 +279,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -287,7 +287,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = + f1 * src[SRCIND3(is,js,ks-2)] + f2 * src[SRCIND3(is,js,ks-1)] @@ -301,7 +301,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is-2,js,ks-2)] + f2*f1 * src[SRCIND3(is-1,js,ks-2)] @@ -346,14 +346,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -361,7 +361,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = + f1*f1 * src[SRCIND3(is,js-2,ks-2)] + f2*f1 * src[SRCIND3(is,js-1,ks-2)] @@ -405,7 +405,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { T const res1 = + f1*f1*f1 * src[SRCIND3(is-2,js-2,ks-2)] @@ -638,7 +638,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -646,7 +646,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -654,7 +654,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc index b65f1632d..44ee78d01 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -227,7 +227,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -235,7 +235,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -243,7 +243,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = interp0 (& src[SRCIND3(is,js,ks)]); i = i+1; id = id+1; @@ -251,7 +251,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is-3,js,ks)], srcdi); i = i+1; id = id+1; @@ -260,14 +260,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -275,7 +275,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js-3,ks)], srcdj); i = i+1; id = id+1; @@ -283,7 +283,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-3,js-3,ks)], srcdi, srcdj); i = i+1; @@ -293,7 +293,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -301,14 +301,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -316,7 +316,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -324,7 +324,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js,ks-3)], srcdk); i = i+1; id = id+1; @@ -332,7 +332,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-3,js,ks-3)], srcdi, srcdj); i = i+1; @@ -342,14 +342,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -357,7 +357,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is,js-3,ks-3)], srcdj, srcdk); i = i+1; @@ -366,7 +366,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { dst[DSTIND3(id,jd,kd)] = interp3 (& src[SRCIND3(is-3,js-3,ks-3)], srcdi, srcdj, srcdk); @@ -378,7 +378,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -386,7 +386,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -394,7 +394,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc index a01a3bd35..04e9a97b8 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc @@ -6,7 +6,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; @@ -229,7 +229,7 @@ namespace CarpetLib { goto l81; // begin j loop - l80: + l80: j = 0; js = j0; jd = dstjoff; @@ -237,7 +237,7 @@ namespace CarpetLib { goto l801; // begin i loop - l800: + l800: i = 0; is = i0; id = dstioff; @@ -245,7 +245,7 @@ namespace CarpetLib { goto l8001; // kernel - l8000: + l8000: dst[DSTIND3(id,jd,kd)] = interp0 (& src[SRCIND3(is,js,ks)]); i = i+1; id = id+1; @@ -253,7 +253,7 @@ namespace CarpetLib { goto l900; // kernel - l8001: + l8001: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is-4,js,ks)], srcdi); i = i+1; id = id+1; @@ -262,14 +262,14 @@ namespace CarpetLib { goto l900; // end i loop - l900: + l900: j = j+1; jd = jd+1; if (j < regjext) goto l801; goto l90; // begin i loop - l801: + l801: i = 0; is = i0; id = dstioff; @@ -277,7 +277,7 @@ namespace CarpetLib { goto l8011; // kernel - l8010: + l8010: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js-4,ks)], srcdj); i = i+1; id = id+1; @@ -285,7 +285,7 @@ namespace CarpetLib { goto l901; // kernel - l8011: + l8011: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-4,js-4,ks)], srcdi, srcdj); i = i+1; @@ -295,7 +295,7 @@ namespace CarpetLib { goto l901; // end i loop - l901: + l901: j = j+1; jd = jd+1; js = js+1; @@ -303,14 +303,14 @@ namespace CarpetLib { goto l90; // end j loop - l90: + l90: k = k+1; kd = kd+1; if (k < regkext) goto l81; goto l9; // begin j loop - l81: + l81: j = 0; js = j0; jd = dstjoff; @@ -318,7 +318,7 @@ namespace CarpetLib { goto l811; // begin i loop - l810: + l810: i = 0; is = i0; id = dstioff; @@ -326,7 +326,7 @@ namespace CarpetLib { goto l8101; // kernel - l8100: + l8100: dst[DSTIND3(id,jd,kd)] = interp1 (& src[SRCIND3(is,js,ks-4)], srcdk); i = i+1; id = id+1; @@ -334,7 +334,7 @@ namespace CarpetLib { goto l910; // kernel - l8101: + l8101: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is-4,js,ks-4)], srcdi, srcdj); i = i+1; @@ -344,14 +344,14 @@ namespace CarpetLib { goto l910; // end i loop - l910: + l910: j = j+1; jd = jd+1; if (j < regjext) goto l811; goto l91; // begin i loop - l811: + l811: i = 0; is = i0; id = dstioff; @@ -359,7 +359,7 @@ namespace CarpetLib { goto l8111; // kernel - l8110: + l8110: dst[DSTIND3(id,jd,kd)] = interp2 (& src[SRCIND3(is,js-4,ks-4)], srcdj, srcdk); i = i+1; @@ -368,7 +368,7 @@ namespace CarpetLib { goto l911; // kernel - l8111: + l8111: { dst[DSTIND3(id,jd,kd)] = interp3 (& src[SRCIND3(is-4,js-4,ks-4)], srcdi, srcdj, srcdk); @@ -380,7 +380,7 @@ namespace CarpetLib { goto l911; // end i loop - l911: + l911: j = j+1; jd = jd+1; js = js+1; @@ -388,7 +388,7 @@ namespace CarpetLib { goto l91; // end j loop - l91: + l91: k = k+1; kd = kd+1; ks = ks+1; @@ -396,7 +396,7 @@ namespace CarpetLib { goto l9; // end k loop - l9:; + l9:; } diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc new file mode 100644 index 000000000..4c8022916 --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc @@ -0,0 +1,602 @@ +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_4d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + srciext, srcjext, srckext, srclext) +#define DSTIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + dstiext, dstjext, dstkext, dstlext) + + + + template + void + prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox) + { + typedef typename typeprops::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect4 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower()) % regbbox.stride() == 0)); + ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + bvect4 const needoffsetlo = srcoff % reffact2 != 0 or regext > 1; + bvect4 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0 or regext > 1; + ivect4 const offsetlo = either (needoffsetlo, 1, 0); + ivect4 const offsethi = either (needoffsethi, 1, 0); + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + size_t const srclext = srcext[3]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + size_t const dstlext = dstext[3]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + size_t const reglext = regext[3]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + size_t const srcloff = srcoff[3]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + size_t const dstloff = dstoff[3]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + size_t const fl = srcloff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + size_t const l0 = srcloff / 2; + + RT const one = 1; + + RT const f1 = one/2; + RT const f2 = one/2; + + + + // Loop over fine region + // Label scheme: l 8 fl fk fj fi + + size_t is, js, ks, ls; + size_t id, jd, kd, ld; + size_t i, j, k, l; + + // begin l loop + l = 0; + ls = l0; + ld = dstloff; + if (fl == 0) goto l80; + goto l81; + + // begin k loop + l80: + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l800; + goto l801; + + // begin j loop + l800: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8000; + goto l8001; + + // begin i loop + l8000: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80000; + goto l80001; + + // kernel + l80000: + dst[DSTIND4(id,jd,kd,ld)] = + + src[SRCIND4(is,js,ks,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80001; + goto l9000; + + // kernel + l80001: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is ,js,ks,ls)] + + f2 * src[SRCIND4(is+1,js,ks,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80000; + goto l9000; + + // end i loop + l9000: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8001; + goto l900; + + // begin i loop + l8001: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80010; + goto l80011; + + // kernel + l80010: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js ,ks,ls)] + + f2 * src[SRCIND4(is,js+1,ks,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80011; + goto l9001; + + // kernel + l80011: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js ,ks,ls)] + + f2*f1 * src[SRCIND4(is+1,js ,ks,ls)] + + f1*f2 * src[SRCIND4(is ,js+1,ks,ls)] + + f2*f2 * src[SRCIND4(is+1,js+1,ks,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80010; + goto l9001; + + // end i loop + l9001: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8000; + goto l900; + + // end j loop + l900: + k = k+1; + kd = kd+1; + if (k < regkext) goto l800; + goto l90; + + // begin j loop + l801: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8010; + goto l8011; + + // begin i loop + l8010: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80100; + goto l80101; + + // kernel + l80100: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js,ks ,ls)] + + f2 * src[SRCIND4(is,js,ks+1,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80101; + goto l9010; + + // kernel + l80101: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js,ks ,ls)] + + f2*f1 * src[SRCIND4(is+1,js,ks ,ls)] + + f1*f2 * src[SRCIND4(is ,js,ks+1,ls)] + + f2*f2 * src[SRCIND4(is+1,js,ks+1,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80100; + goto l9010; + + // end i loop + l9010: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8011; + goto l901; + + // begin i loop + l8011: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80110; + goto l80111; + + // kernel + l80110: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js ,ks ,ls)] + + f2*f1 * src[SRCIND4(is,js+1,ks ,ls)] + + f1*f2 * src[SRCIND4(is,js ,ks+1,ls)] + + f2*f2 * src[SRCIND4(is,js+1,ks+1,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80111; + goto l9011; + + // kernel + l80111: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls)] + + f2*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls)] + + f1*f2*f1 * src[SRCIND4(is ,js+1,ks ,ls)] + + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks ,ls)] + + f1*f1*f2 * src[SRCIND4(is ,js ,ks+1,ls)] + + f2*f1*f2 * src[SRCIND4(is+1,js ,ks+1,ls)] + + f1*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls)] + + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80110; + goto l9011; + + // end i loop + l9011: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8010; + goto l901; + + // end j loop + l901: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l800; + goto l90; + + // end k loop + l90: + l = l+1; + ld = ld+1; + ls = ls+1; + if (l < reglext) goto l81; + goto l80; + + // begin k loop + l81: + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l810; + goto l811; + + // begin j loop + l810: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8100; + goto l8101; + + // begin i loop + l8100: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81000; + goto l81001; + + // kernel + l81000: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js,ks,ls )] + + f2 * src[SRCIND4(is,js,ks,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81001; + goto l9100; + + // kernel + l81001: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js,ks,ls )] + + f2*f1 * src[SRCIND4(is+1,js,ks,ls )] + + f1*f2 * src[SRCIND4(is ,js,ks,ls+1)] + + f2*f2 * src[SRCIND4(is+1,js,ks,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81000; + goto l9100; + + // end i loop + l9100: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8101; + goto l910; + + // begin i loop + l8101: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81010; + goto l81011; + + // kernel + l81010: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js ,ks,ls )] + + f2*f1 * src[SRCIND4(is,js+1,ks,ls )] + + f1*f2 * src[SRCIND4(is,js ,ks,ls+1)] + + f2*f2 * src[SRCIND4(is,js+1,ks,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81011; + goto l9101; + + // kernel + l81011: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js ,ks,ls )] + + f2*f1*f1 * src[SRCIND4(is+1,js ,ks,ls )] + + f1*f2*f1 * src[SRCIND4(is ,js+1,ks,ls )] + + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks,ls )] + + f1*f1*f2 * src[SRCIND4(is ,js ,ks,ls+1)] + + f2*f1*f2 * src[SRCIND4(is+1,js ,ks,ls+1)] + + f1*f2*f2 * src[SRCIND4(is ,js+1,ks,ls+1)] + + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81010; + goto l9101; + + // end i loop + l9101: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8100; + goto l910; + + // end j loop + l910: + k = k+1; + kd = kd+1; + if (k < regkext) goto l810; + goto l91; + + // begin j loop + l811: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8110; + goto l8111; + + // begin i loop + l8110: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81100; + goto l81101; + + // kernel + l81100: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js,ks ,ls )] + + f2*f1 * src[SRCIND4(is,js,ks+1,ls )] + + f1*f2 * src[SRCIND4(is,js,ks ,ls+1)] + + f2*f2 * src[SRCIND4(is,js,ks+1,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81101; + goto l9110; + + // kernel + l81101: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js,ks ,ls )] + + f2*f1*f1 * src[SRCIND4(is+1,js,ks ,ls )] + + f1*f2*f1 * src[SRCIND4(is ,js,ks+1,ls )] + + f2*f2*f1 * src[SRCIND4(is+1,js,ks+1,ls )] + + f1*f1*f2 * src[SRCIND4(is ,js,ks ,ls+1)] + + f2*f1*f2 * src[SRCIND4(is+1,js,ks ,ls+1)] + + f1*f2*f2 * src[SRCIND4(is ,js,ks+1,ls+1)] + + f2*f2*f2 * src[SRCIND4(is+1,js,ks+1,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81100; + goto l9110; + + // end i loop + l9110: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8111; + goto l911; + + // begin i loop + l8111: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81110; + goto l81111; + + // kernel + l81110: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1*f1 * src[SRCIND4(is,js ,ks ,ls )] + + f2*f1*f1*f1 * src[SRCIND4(is,js+1,ks ,ls )] + + f1*f2*f2*f1 * src[SRCIND4(is,js ,ks+1,ls )] + + f2*f2*f2*f1 * src[SRCIND4(is,js+1,ks+1,ls )] + + f1*f1*f1*f2 * src[SRCIND4(is,js ,ks ,ls+1)] + + f2*f1*f1*f2 * src[SRCIND4(is,js+1,ks ,ls+1)] + + f1*f2*f2*f2 * src[SRCIND4(is,js ,ks+1,ls+1)] + + f2*f2*f2*f2 * src[SRCIND4(is,js+1,ks+1,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81111; + goto l9111; + + // kernel + l81111: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls )] + + f2*f1*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls )] + + f1*f2*f1*f1 * src[SRCIND4(is ,js+1,ks ,ls )] + + f2*f2*f1*f1 * src[SRCIND4(is+1,js+1,ks ,ls )] + + f1*f1*f2*f1 * src[SRCIND4(is ,js ,ks+1,ls )] + + f2*f1*f2*f1 * src[SRCIND4(is+1,js ,ks+1,ls )] + + f1*f2*f2*f1 * src[SRCIND4(is ,js+1,ks+1,ls )] + + f2*f2*f2*f1 * src[SRCIND4(is+1,js+1,ks+1,ls )] + + f1*f1*f1*f2 * src[SRCIND4(is ,js ,ks ,ls+1)] + + f2*f1*f1*f2 * src[SRCIND4(is+1,js ,ks ,ls+1)] + + f1*f2*f1*f2 * src[SRCIND4(is ,js+1,ks ,ls+1)] + + f2*f2*f1*f2 * src[SRCIND4(is+1,js+1,ks ,ls+1)] + + f1*f1*f2*f2 * src[SRCIND4(is ,js ,ks+1,ls+1)] + + f2*f1*f2*f2 * src[SRCIND4(is+1,js ,ks+1,ls+1)] + + f1*f2*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls+1)] + + f2*f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81110; + goto l9111; + + // end i loop + l9111: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8110; + goto l911; + + // end j loop + l911: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l810; + goto l91; + + // end k loop + l91: + l = l+1; + ld = ld+1; + ls = ls+1; + if (l < reglext) goto l81; + goto l81; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_4d_o1_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcext, \ + T * restrict const dst, \ + ivect4 const & restrict dstext, \ + ibbox4 const & restrict srcbbox, \ + ibbox4 const & restrict dstbbox, \ + ibbox4 const & restrict regbbox); +#include "instantiate" +#undef INSTANTIATE + + + +} // CarpetLib diff --git a/Carpet/CarpetLib/src/region.cc b/Carpet/CarpetLib/src/region.cc index 0230d373d..860d24c6b 100644 --- a/Carpet/CarpetLib/src/region.cc +++ b/Carpet/CarpetLib/src/region.cc @@ -251,6 +251,67 @@ operator<< (ostream & os, region_t const & reg) +// Create an MPI datatype for a pseudoretion +MPI_Datatype +mpi_datatype (pseudoregion_t const &) +{ + static bool initialised = false; + static MPI_Datatype newtype; + if (not initialised) { + static pseudoregion_t s; +#define ENTRY(type, name) \ + { \ + sizeof s.name / sizeof(type), /* count elements */ \ + (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \ + dist::mpi_datatype(), /* find MPI datatype */ \ + STRINGIFY(name), /* field name */ \ + STRINGIFY(type), /* type name */ \ + } + dist::mpi_struct_descr_t const descr[] = { + ENTRY(int, extent), + ENTRY(int, component), + {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"} + }; +#undef ENTRY + newtype = + dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr, + "pseudoregion_t", sizeof s); + initialised = true; + } + return newtype; +} + +MPI_Datatype +mpi_datatype (sendrecv_pseudoregion_t const &) +{ + static bool initialised = false; + static MPI_Datatype newtype; + if (not initialised) { + static sendrecv_pseudoregion_t s; +#define ENTRY(type, name) \ + { \ + sizeof s.name / sizeof(type), /* count elements */ \ + (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \ + dist::mpi_datatype(), /* find MPI datatype */ \ + STRINGIFY(name), /* field name */ \ + STRINGIFY(type), /* type name */ \ + } + dist::mpi_struct_descr_t const descr[] = { + ENTRY(pseudoregion_t, send), + ENTRY(pseudoregion_t, recv), + {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"} + }; +#undef ENTRY + newtype = + dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr, + "sendrecv_pseudoregion_t", sizeof s); + initialised = true; + } + return newtype; +} + + + // Compare two pseudoregions for equality. bool operator== (pseudoregion_t const & a, pseudoregion_t const & b) @@ -262,9 +323,45 @@ operator== (pseudoregion_t const & a, pseudoregion_t const & b) +istream & operator>> (istream & is, pseudoregion_t & p) +{ + try { + skipws (is); + consume (is, "(ext:"); + is >> p.extent; + skipws (is); + consume (is, ",c:"); + is >> p.component; + skipws (is); + consume (is, ")"); + } catch (input_error & err) { + cout << "Input error while reading a pseudoregion_t" << endl; + throw err; + } + return is; +} + +istream & operator>> (istream & is, sendrecv_pseudoregion_t & srp) +{ + try { + skipws (is); + consume (is, "(send:"); + is >> srp.send; + consume (is, ",recv:"); + is >> srp.recv; + consume (is, ")"); + } catch (input_error & err) { + cout << "Input error while reading a sendrecv_pseudoregion_t" << endl; + throw err; + } + return is; +} + + + ostream & operator<< (ostream & os, pseudoregion_t const & p) { - return os << p.extent << "/c:" << p.component; + return os << "(ext:" << p.extent << ",c:" << p.component << ")"; } ostream & operator<< (ostream & os, sendrecv_pseudoregion_t const & srp) diff --git a/Carpet/CarpetLib/src/region.hh b/Carpet/CarpetLib/src/region.hh index 66037bdc7..ed14b024d 100644 --- a/Carpet/CarpetLib/src/region.hh +++ b/Carpet/CarpetLib/src/region.hh @@ -5,6 +5,7 @@ #include #include "defs.hh" +#include "dist.hh" #include "bbox.hh" #include "fulltree.hh" #include "vect.hh" @@ -24,12 +25,16 @@ struct region_t { region_t & operator= (region_t const & a); ~region_t (); - bool invariant () const; + bool invariant () const CCTK_ATTRIBUTE_PURE; }; -bool operator== (region_t const & a, region_t const & b); +bool operator== (region_t const & a, region_t const & b) + CCTK_ATTRIBUTE_PURE; +inline +bool operator!= (region_t const & a, region_t const & b) + CCTK_ATTRIBUTE_PURE; inline bool operator!= (region_t const & a, region_t const & b) { @@ -44,7 +49,7 @@ combine_regions (vector const & oldregs, -size_t memoryof (region_t const & reg); +size_t memoryof (region_t const & reg) CCTK_ATTRIBUTE_PURE; istream & operator>> (istream & is, region_t & reg); ostream & operator<< (ostream & os, region_t const & reg); @@ -61,19 +66,37 @@ struct pseudoregion_t { pseudoregion_t () { } + pseudoregion_t (pseudoregion_t const & p) + : extent (p.extent), component (p.component) + { + } pseudoregion_t (ibbox const & extent_, int const component_) : extent (extent_), component (component_) { } }; -bool operator== (pseudoregion_t const & a, pseudoregion_t const & b); +MPI_Datatype mpi_datatype (pseudoregion_t const &) + CCTK_ATTRIBUTE_CONST; +namespace dist { + template<> inline MPI_Datatype mpi_datatype () + CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () + { pseudoregion_t dummy; return mpi_datatype(dummy); } +} + +bool operator== (pseudoregion_t const & a, pseudoregion_t const & b) + CCTK_ATTRIBUTE_PURE; +inline +bool operator!= (pseudoregion_t const & a, pseudoregion_t const & b) + CCTK_ATTRIBUTE_PURE; inline bool operator!= (pseudoregion_t const & a, pseudoregion_t const & b) { return not (a == b); } +inline size_t memoryof (pseudoregion_t const & p) CCTK_ATTRIBUTE_PURE; inline size_t memoryof (pseudoregion_t const & p) { return @@ -81,6 +104,7 @@ inline size_t memoryof (pseudoregion_t const & p) memoryof (p.component); } +istream & operator>> (istream & is, pseudoregion_t & p); ostream & operator<< (ostream & os, pseudoregion_t const & p); @@ -90,19 +114,35 @@ struct sendrecv_pseudoregion_t { sendrecv_pseudoregion_t () { } + sendrecv_pseudoregion_t (sendrecv_pseudoregion_t const & srp) + : send (srp.send), recv (srp.recv) + { + } sendrecv_pseudoregion_t (ibbox const & send_extent, int const send_component, - ibbox const & recv_extent, int const recv_component) + ibbox const & recv_extent, int const recv_component) : send (pseudoregion_t (send_extent, send_component)), recv (pseudoregion_t (recv_extent, recv_component)) { } }; +MPI_Datatype mpi_datatype (sendrecv_pseudoregion_t const &) + CCTK_ATTRIBUTE_CONST; +namespace dist { + template<> inline MPI_Datatype mpi_datatype () + CCTK_ATTRIBUTE_CONST; + template<> inline MPI_Datatype mpi_datatype () + { sendrecv_pseudoregion_t dummy; return mpi_datatype(dummy); } +} + +inline size_t memoryof (sendrecv_pseudoregion_t const & srp) + CCTK_ATTRIBUTE_PURE; inline size_t memoryof (sendrecv_pseudoregion_t const & srp) { return memoryof (srp.send) + memoryof (srp.recv); } +istream & operator>> (istream & os, sendrecv_pseudoregion_t & srp); ostream & operator<< (ostream & os, sendrecv_pseudoregion_t const & srp); diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index 7eebd36b0..c2f89ae4d 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -5,7 +5,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc index 26031f304..1e0cc6ec4 100644 --- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc @@ -7,7 +7,7 @@ #include #include -#include "operator_prototypes.hh" +#include "operator_prototypes_3d.hh" #include "typeprops.hh" using namespace std; diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc new file mode 100644 index 000000000..77bf2d28b --- /dev/null +++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "operator_prototypes_4d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND4(i,j,k,l) \ + index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srciext, srcjext, srckext, srclext) +#define DSTIND4(i,j,k,l) \ + index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstiext, dstjext, dstkext, dstlext) + + + + template + void + restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox) + { + if (any (srcbbox.stride() >= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (reffact2 * srcbbox.stride() != dstbbox.stride())) { + CCTK_WARN (0, "Internal error: destination strides are not twice the source strides"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + if (not regbbox.is_contained_in(srcbbox) or + not regbbox.is_contained_in(dstbbox)) + { + cerr << "srcbbox: " << srcbbox << endl + << "dstbbox: " << dstbbox << endl + << "regbbox: " << regbbox << endl; + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + ivect4 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower()) % srcbbox.stride() == 0)); + ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / srcbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % dstbbox.stride() == 0)); + ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / dstbbox.stride(); + + + + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; + ptrdiff_t const srclext = srcext[3]; + + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; + ptrdiff_t const dstlext = dstext[3]; + + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; + ptrdiff_t const reglext = regext[3]; + + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; + ptrdiff_t const srcloff = srcoff[3]; + + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; + ptrdiff_t const dstloff = dstoff[3]; + + + + // Loop over coarse region +#pragma omp parallel for + for (int l=0; l +#include +#include + +#include + +// IRIX wants this before +#if HAVE_SYS_TYPES_H +# include +#endif + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# elif HAVE_TIME_H +# include +# endif +#endif + +#if HAVE_UNISTD_H +# include +#endif + +#include "startup_time.hh" + + + +namespace CarpetLib { + + using namespace std; + + + + // Return the current wall time + static + double + get_walltime () + { +#ifdef HAVE_TIME_GETTIMEOFDAY + // get the current time + struct timeval tv; + gettimeofday (& tv, 0); + return tv.tv_sec + tv.tv_usec / 1.0e6; +#else + return 0.0; +#endif + } + + + + void + output_startup_time () + { + char * const cactus_starttime = getenv ("CACTUS_STARTTIME"); + if (not cactus_starttime) { + CCTK_VWarn (CCTK_WARN_PICKY, + __LINE__, __FILE__, CCTK_THORNSTRING, + "Could not determine Cactus startup time (environment variable CACTUS_STARTTIME is not set; it should be set to the output of \"date +%%s\")"); + return; + } + + double starttime; + int const iret = sscanf (cactus_starttime, "%lf", &starttime); + if (iret != 1) { + CCTK_VWarn (CCTK_WARN_COMPLAIN, + __LINE__, __FILE__, CCTK_THORNSTRING, + "Could not determine Cactus startup time (environment variable CACTUS_STARTTIME has illegal value \"%s\"; it should instead be set to the output of \"date +%%s\", which is a single number)", + cactus_starttime); + return; + } + + double const currenttime = get_walltime (); + double const startuptime = currenttime - starttime; + + CCTK_VInfo (CCTK_THORNSTRING, + "Process startup time was %.3g seconds", startuptime); + } + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/startup_time.hh b/Carpet/CarpetLib/src/startup_time.hh new file mode 100644 index 000000000..f92b3df4b --- /dev/null +++ b/Carpet/CarpetLib/src/startup_time.hh @@ -0,0 +1,6 @@ +namespace CarpetLib { + + void + output_startup_time (); + +} // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/th.cc b/Carpet/CarpetLib/src/th.cc index a10dcb289..dd441e455 100644 --- a/Carpet/CarpetLib/src/th.cc +++ b/Carpet/CarpetLib/src/th.cc @@ -14,6 +14,10 @@ using namespace std; +list th::allth; + + + // Constructors th::th (gh& h_, const vector & reffacts_, const CCTK_REAL basedelta) : h(h_), reffacts(reffacts_), delta(basedelta) @@ -24,13 +28,15 @@ th::th (gh& h_, const vector & reffacts_, const CCTK_REAL basedelta) assert (reffacts.AT(n) >= reffacts.AT(n-1)); assert (reffacts.AT(n) % reffacts.AT(n-1) == 0); } - h.add(this); + allthi = allth.insert(allth.end(), this); + gh_handle = h.add(this); } // Destructors th::~th () { - h.remove(this); + h.erase(gh_handle); + allth.erase(allthi); } // Modifiers @@ -60,6 +66,10 @@ void th::regrid () } } +void th::regrid_free () +{ +} + // Memory usage @@ -75,6 +85,19 @@ memory () memoryof (deltas); } +size_t +th:: +allmemory () +{ + size_t mem = memoryof(allth); + for (list::const_iterator + thi = allth.begin(); thi != allth.end(); ++ thi) + { + mem += memoryof(**thi); + } + return mem; +} + // Output diff --git a/Carpet/CarpetLib/src/th.hh b/Carpet/CarpetLib/src/th.hh index 34e9af999..f1a7b72b7 100644 --- a/Carpet/CarpetLib/src/th.hh +++ b/Carpet/CarpetLib/src/th.hh @@ -25,10 +25,14 @@ ostream& operator<< (ostream& os, const th& t); // The time hierarchy (information about the current time) class th { + static list allth; + list::iterator allthi; + public: // should be readonly // Fields gh& h; // hierarchy + gh::th_handle gh_handle; private: @@ -48,9 +52,10 @@ public: // Modifiers void regrid (); + void regrid_free (); // Time management - CCTK_REAL get_time (const int rl, const int ml) const + CCTK_REAL get_time (const int rl, const int ml) const CCTK_ATTRIBUTE_PURE { assert (rl>=0 and rl=0 and ml=0 and rl=0 and ml=0 and rl=0 and ml #include #include +#include #include #include @@ -32,6 +33,7 @@ namespace CarpetLib { static bool have_cputick = false; + // CPU tick time in seconts static double cputick = 0.0; @@ -266,11 +268,11 @@ namespace CarpetLib { os << timername << ":" << " cnt: " << count - << " time: sum: " << wtime - << " avg: " << avg - << " stddev: " << stddev - << " min: " << wmin - << " max: " << wmax + << " time: sum: " << cputick * wtime + << " avg: " << cputick * avg + << " stddev: " << cputick * stddev + << " min: " << cputick * wmin + << " max: " << cputick * wmax << " bytes: sum: " << bytes << " avg: " << bavg << " stddev: " << bstddev @@ -324,11 +326,11 @@ namespace CarpetLib { file << "Build ID: " << build_id << eol; } if (CCTK_IsFunctionAliased ("UniqueSimulationID")) { - char const * const job_id = + char const * const sim_id = static_cast (UniqueSimulationID (cctkGH)); - file << "Simulation ID: " << job_id << eol; + file << "Simulation ID: " << sim_id << eol; } - file << "Running on " << dist::size() << " processors" << eol; + file << "Running with " << dist::size() << " processes and " << dist::total_num_threads() << " threads" << eol; } // if do_print_info file << "********************************************************************************" << eol @@ -341,4 +343,161 @@ namespace CarpetLib { } + + + struct t_cycleclock { + double total; + double total_squared; + double min_total; + double max_total; + double count; + ticks last; + + t_cycleclock () + { + reset(); + } + + ~t_cycleclock () + { + } + + void start () + { + last = getticks(); + } + + void stop () + { + ticks const current = getticks(); + double const difference = elapsed (current, last); + total += difference; + total_squared += pow (difference, 2); + min_total = min_total == 0.0 ? difference : min (min_total, difference); + max_total = max (min_total, difference); + count += 1; + } + + void reset () + { + total = 0.0; + total_squared = 0.0; + min_total = 0.0; // numeric_limits::max(); + max_total = 0.0; + count = 0.0; + // last = 0.0; + } + + }; + + + + void * cycleclock_create (int const timernum) + { + return new t_cycleclock; + } + + void cycleclock_destroy (int const timernum, void * const data) + { + if (data) { + delete static_cast (data); + } + } + + void cycleclock_start (int const timernum, void * const data) + { + static_cast (data) -> start(); + } + + void cycleclock_stop (int const timernum, void * const data) + { + static_cast (data) -> stop(); + } + + void cycleclock_reset (int const timernum, void * const data) + { + static_cast (data) -> reset(); + } + + void cycleclock_get (int const timernum, void * const data_, + cTimerVal * const vals) + { + t_cycleclock const & data = * static_cast (data_); + + // Total time + vals[0].type = val_double; + vals[0].heading = "cycle"; + vals[0].units = "secs"; + vals[0].val.d = data.total; + vals[0].seconds = cputick * vals[0].val.d; + vals[0].resolution = cputick; + + // Average + vals[1].type = val_double; + vals[1].heading = "cycle[avg]"; + vals[1].units = "secs"; + vals[1].val.d = data.count == 0.0 ? 0.0 : data.total / data.count; + vals[1].seconds = cputick * vals[1].val.d; + vals[1].resolution = cputick; + + // Standard deviation + vals[2].type = val_double; + vals[2].heading = "cycle[stddev]"; + vals[2].units = "secs"; + vals[2].val.d = (data.count == 0.0 + ? 0.0 + : sqrt (abs (data.total_squared * data.count - + pow (data.total, 2)) / data.count)); + vals[2].seconds = cputick * vals[2].val.d; + vals[2].resolution = cputick; + + // Minimum + vals[3].type = val_double; + vals[3].heading = "cycle[min]"; + vals[3].units = "secs"; + vals[3].val.d = data.min_total; + vals[3].seconds = cputick * vals[3].val.d; + vals[3].resolution = cputick; + + // Maximum + vals[4].type = val_double; + vals[4].heading = "cycle[max]"; + vals[4].units = "secs"; + vals[4].val.d = data.max_total; + vals[4].seconds = cputick * vals[4].val.d; + vals[4].resolution = cputick; + } + + void cycleclock_set (int const timernum, void * const data_, + cTimerVal * const vals) + { + t_cycleclock & data = * static_cast (data_); + + data.reset(); // punt + data.total = vals[0].val.d; + } + + extern "C" { + int CarpetLib_registercycleclock (void); + } + + int CarpetLib_registercycleclock (void) + { + if (not have_cputick) calculate_cputick (); + + cClockFuncs functions; + functions.n_vals = 5; + functions.create = cycleclock_create; + functions.destroy = cycleclock_destroy; + functions.start = cycleclock_start; + functions.stop = cycleclock_stop; + functions.reset = cycleclock_reset; + functions.get = cycleclock_get; + functions.set = cycleclock_set; + + CCTK_ClockRegister("cycle", &functions); + + return 0; + } + } // namespace CarpetLib diff --git a/Carpet/CarpetLib/src/vect.cc b/Carpet/CarpetLib/src/vect.cc index 28eff14dc..e38ed745a 100644 --- a/Carpet/CarpetLib/src/vect.cc +++ b/Carpet/CarpetLib/src/vect.cc @@ -43,21 +43,42 @@ void vect::output (ostream& os) const { +// Specialise some constructors for lower dimensions +// These functions are declared, but must not be used. + +template<> vect::vect (const int& x, const int& y) { assert(0); } +template<> vect::vect (const int& x, const int& y) { assert(0); } +template<> vect::vect (const int& x, const int& y) { assert(0); } +template<> vect::vect (const int& x, const int& y) { assert(0); } + +template<> vect::vect (const int& x, const int& y, const int& z) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z) { assert(0); } + +template<> vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } + + + // Note: We need all dimensions all the time. template class vect; template class vect; template class vect; template class vect; +template class vect; -template void vect::input (istream& is); -template void vect,3>::input (istream& is); -template void vect,2>::input (istream& is); +template void vect::input (istream& is); +template void vect,dim>::input (istream& is); +template void vect,2>::input (istream& is); template void vect::output (ostream& os) const; -template void vect::output (ostream& os) const; +template void vect::output (ostream& os) const; template void vect::output (ostream& os) const; -template void vect::output (ostream& os) const; -template void vect,3>::output (ostream& os) const; -template void vect,3>::output (ostream& os) const; -template void vect,2>::output (ostream& os) const; -template void vect,2>::output (ostream& os) const; +template void vect::output (ostream& os) const; +template void vect,dim>::output (ostream& os) const; +template void vect,dim>::output (ostream& os) const; +template void vect,2>::output (ostream& os) const; +template void vect,2>::output (ostream& os) const; diff --git a/Carpet/CarpetLib/src/vect.hh b/Carpet/CarpetLib/src/vect.hh index 56898d970..03e62aba6 100644 --- a/Carpet/CarpetLib/src/vect.hh +++ b/Carpet/CarpetLib/src/vect.hh @@ -8,39 +8,17 @@ #include "cctk.h" +#include "defs.hh" #include "vect_helpers.hh" using namespace std; -#if 0 - -// A pure function returns a value that depends only on the function -// arguments and on global variables, and the function has no side -// effects. -#ifdef HAVE_CCTK_CXX_ATTRIBUTE_PURE -# define PURE __attribute__((pure)) -#else -# define PURE -#endif - -// A const function returns a value that depends only on the function -// arguments, and the function has no side effects. This is even more -// strict than pure functions. Const functions cannot dereference -// pointers or references (or this). -#ifdef HAVE_CCTK_CXX_ATTRIBUTE_CONST -# define CONST __attribute__((const)) +#ifdef CARPET_DEBUG +# define ASSERT_VECT(x) assert(x) #else -# define CONST -#endif - -#else - -// Don't take any risks -# define PURE -# define CONST - +# define ASSERT_VECT(x) #endif @@ -72,38 +50,44 @@ public: // Constructors /** Explicit empty constructor. */ - explicit vect () CONST { } + explicit vect () CCTK_MEMBER_ATTRIBUTE_CONST { } /** Copy constructor. */ - vect (const vect& a) PURE { + vect (const vect& a) CCTK_MEMBER_ATTRIBUTE_PURE + { for (int d=0; d - operator vect,E> () CONST { + operator vect,E> () CCTK_MEMBER_ATTRIBUTE_CONST + { vect,E> r; for (int e=0; e - /*explicit*/ vect (const vect& a) /*PURE*/ { + /*explicit*/ vect (const vect& a) /*CCTK_MEMBER_ATTRIBUTE_PURE*/ + { for (int d=0; d=0 && d=0 && d=0 && d=0 && d - vect operator[] (const vect& a) const /*PURE*/ { + vect operator[] (const vect& a) + const /*CCTK_MEMBER_ATTRIBUTE_PURE*/ + { vect r; // (*this)[] performs index checking for (int d=0; d=0 && d=0 && d - vect ifthen (const vect& a, const vect& b) const /*PURE*/ { + vect ifthen (const vect& a, const vect& b) + const /*CCTK_MEMBER_ATTRIBUTE_PURE*/ + { vect r; for (int d=0; d inline vect either (const vect& a, - const vect& b, const vect& c) PURE; + const vect& b, const vect& c) + CCTK_ATTRIBUTE_PURE; template inline vect either (const vect& a, const vect& b, const vect& c) @@ -322,7 +333,8 @@ inline vect either (const vect& a, template inline vect either (const vect& a, - const T& b, const T& c) PURE; + const T& b, const T& c) + CCTK_ATTRIBUTE_PURE; template inline vect either (const vect& a, const T& b, const T& c) @@ -332,9 +344,11 @@ inline vect either (const vect& a, /** Transpose a vector of a vector */ template -inline vect,DD> xpose (vect,D> const & a) PURE; +inline vect,DD> xpose (vect,D> const & a) + CCTK_ATTRIBUTE_PURE; template -inline vect,DD> xpose (vect,D> const & a) { +inline vect,DD> xpose (vect,D> const & a) +{ vect,DD> r; for (int dd=0; dd,DD> xpose (vect,D> const & a) { /** Return the element-wise integer power of two vectors. */ template -inline vect ipow (const vect& a, const vect& b) PURE; +inline vect ipow (const vect& a, const vect& b) + CCTK_ATTRIBUTE_PURE; template -inline vect ipow (const vect& a, const vect& b) { +inline vect ipow (const vect& a, const vect& b) +{ vect r; for (int d=0; d -inline int count (const vect& a) PURE; +inline int count (const vect& a) CCTK_ATTRIBUTE_PURE; template -inline int count (const vect& a) { +inline int count (const vect& a) +{ return D; } /** Return the size (number of elements) of the vector. */ template -inline int size (const vect& a) CONST; +inline int size (const vect& a) CCTK_ATTRIBUTE_CONST; template -inline int size (const vect& a) { +inline int size (const vect& a) +{ return D; } /** Return the index of the first maximum element. */ template -inline int maxloc (const vect& a) PURE; +inline int maxloc (const vect& a) CCTK_ATTRIBUTE_PURE; template -inline int maxloc (const vect& a) { - assert (D>0); +inline int maxloc (const vect& a) +{ + ASSERT_VECT (D>0); int r(0); for (int d=1; da[r]) r=d; return r; @@ -425,10 +444,11 @@ inline int maxloc (const vect& a) { /** Return the index of the first minimum element. */ template -inline int minloc (const vect& a) PURE; +inline int minloc (const vect& a) CCTK_ATTRIBUTE_PURE; template -inline int minloc (const vect& a) { - assert (D>0); +inline int minloc (const vect& a) +{ + ASSERT_VECT (D>0); int r(0); for (int d=1; d& a) { /** Return the n-dimensional linear array index. */ template -inline T index (const vect& lsh, const vect& ind) PURE; +inline T index (const vect& lsh, const vect& ind) CCTK_ATTRIBUTE_PURE; template -inline T index (const vect& lsh, const vect& ind) { +inline T index (const vect& lsh, const vect& ind) +{ T r(0); for (int d=D-1; d>=0; --d) { - assert (lsh[d]>=0); + ASSERT_VECT (lsh[d]>=0); // Be generous, and allow relative indices which may be negtive - // assert (ind[d]>=0 and ind[d]=0 and ind[d]& lsh, const vect& ind) { /** Return a new vector where the function func() has been applied to all elements. */ template -inline vect map (U (* const func)(T x), const vect& a) { +inline vect map (U (* const func)(T x), const vect& a) + CCTK_ATTRIBUTE_PURE; +template +inline vect map (U (* const func)(T x), const vect& a) +{ vect r; for (int d=0; d map (U (* const func)(T x), const vect& a) { /** Return a new vector where the function func() has been used element-wise to combine a and b. */ template +inline vect zip (U (* const func)(S x, T y), + const vect& a, const vect& b) + CCTK_ATTRIBUTE_PURE; +template inline vect zip (U (* const func)(S x, T y), const vect& a, const vect& b) { @@ -480,6 +509,9 @@ inline vect zip (U (* const func)(S x, T y), /** Return a scalar where the function func() has been used to reduce the vector a, starting with the scalar value val. */ template +inline U fold (U (* const func)(U val, T x), U val, const vect& a) + CCTK_ATTRIBUTE_PURE; +template inline U fold (U (* const func)(U val, T x), U val, const vect& a) { for (int d=0; d& a) /** Return a scalar where the function func() has been used to reduce the vector a, starting with element 0. */ template +inline U fold1 (U (* const func)(U val, T x), const vect& a) + CCTK_ATTRIBUTE_PURE; +template inline U fold1 (U (* const func)(U val, T x), const vect& a) { - assert (D>=1); + ASSERT_VECT (D>=1); U val = a[0]; for (int d=1; d& a) /** Return a vector where the function func() has been used to scan the vector a, starting with the scalar value val. */ template +inline vect scan0 (U (* const func)(U val, T x), U val, + const vect& a) + CCTK_ATTRIBUTE_PURE; +template inline vect scan0 (U (* const func)(U val, T x), U val, const vect& a) { @@ -514,6 +553,10 @@ inline vect scan0 (U (* const func)(U val, T x), U val, /** Return a vector where the function func() has been used to scan the vector a, starting with element 0. */ template +inline vect scan1 (U (* const func)(U val, T x), U val, + const vect& a) + CCTK_ATTRIBUTE_PURE; +template inline vect scan1 (U (* const func)(U val, T x), U val, const vect& a) { @@ -531,7 +574,10 @@ inline vect scan1 (U (* const func)(U val, T x), U val, // Memory usage template -inline size_t memoryof (vect const & a) { return a.memory(); } +inline size_t memoryof (vect const & a) CCTK_ATTRIBUTE_CONST; +template +inline size_t memoryof (vect const & a) +{ return a.memory(); } @@ -562,23 +608,26 @@ inline ostream& operator<< (ostream& os, const vect& a) { /** Constructor for 2-element vectors from 2 elements. */ template -inline vect::vect (const T& x, const T& y) PURE; +inline vect::vect (const T& x, const T& y) CCTK_ATTRIBUTE_PURE; template -inline vect::vect (const T& x, const T& y) { +inline vect::vect (const T& x, const T& y) +{ elt[0]=x; elt[1]=y; } /** Constructor for 3-element vectors from 3 elements. */ -vect (const T& x, const T& y, const T& z) PURE; -vect (const T& x, const T& y, const T& z) { - assert (D==3); +vect (const T& x, const T& y, const T& z) CCTK_ATTRIBUTE_PURE; +vect (const T& x, const T& y, const T& z) +{ + ASSERT_VECT (D==3); elt[0]=x; elt[1]=y; elt[2]=z; } /** Constructor for 4-element vectors from 4 elements. */ -vect (const T& x, const T& y, const T& z, const T& t) PURE; -vect (const T& x, const T& y, const T& z, const T& t) { - assert (D==4); +vect (const T& x, const T& y, const T& z, const T& t) CCTK_ATTRIBUTE_PURE; +vect (const T& x, const T& y, const T& z, const T& t) +{ + ASSERT_VECT (D==4); elt[0]=x; elt[1]=y; elt[2]=z; elt[3]=t; } #endif @@ -590,29 +639,30 @@ vect (const T& x, const T& y, const T& z, const T& t) { // Specialise some constructors for lower dimensions -// These functions are declared, but never defined, so that using them -// will result in a linker error - -template<> inline vect::vect (const int& x, const int& y) { assert(0); } -template<> inline vect::vect (const int& x, const int& y) { assert(0); } +// These functions are declared, but must not be used. -template<> inline vect::vect (const int& x, const int& y, const int& z) { assert(0); } -template<> inline vect::vect (const int& x, const int& y, const int& z) { assert(0); } -template<> inline vect::vect (const int& x, const int& y, const int& z) { assert(0); } +template<> vect::vect (const int& x, const int& y); +template<> vect::vect (const int& x, const int& y); +template<> vect::vect (const int& x, const int& y); +template<> vect::vect (const int& x, const int& y); -template<> inline vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } -template<> inline vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } -template<> inline vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } -template<> inline vect::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); } +template<> vect::vect (const int& x, const int& y, const int& z); +template<> vect::vect (const int& x, const int& y, const int& z); +template<> vect::vect (const int& x, const int& y, const int& z); +template<> vect::vect (const int& x, const int& y, const int& z); +template<> vect::vect (const int& x, const int& y, const int& z, const int& t); +template<> vect::vect (const int& x, const int& y, const int& z, const int& t); +template<> vect::vect (const int& x, const int& y, const int& z, const int& t); +template<> vect::vect (const int& x, const int& y, const int& z, const int& t); // Specialise for CCTK_REAL template<> -inline vect& vect::operator%=(const vect& a) { - for (int d=0; d<3; ++d) { +inline vect& vect::operator%=(const vect& a) { + for (int d=0; da[d]*(CCTK_REAL)(1.0-1.0e-10)) elt[d]=(CCTK_REAL)0; if (elt[d]& vect::operator%=(const vect& } template<> -inline vect operator%(const vect& a, const vect& b) { - vect r; - for (int d=0; d<3; ++d) { +inline vect operator%(const vect& a, const vect& b) { + vect r; + for (int d=0; db[d]*(CCTK_REAL)(1.0-1.0e-10)) r[d]=(CCTK_REAL)0; if (r[d] fn () const PURE { \ + vect fn () const CCTK_ATTRIBUTE_PURE \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const vect& a) PURE; \ - template \ - inline vect fn (const vect& a) { \ - vect r; \ - for (int d=0; d \ - inline vect fn (const vect,E>& a) PURE; \ - template \ - inline vect fn (const vect,E>& a) \ - { \ - vect r; \ - for (int e=0; e \ + inline vect fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ + template \ + inline vect fn (const vect& a) \ + { \ + vect r; \ + for (int d=0; d \ + inline vect fn (const vect,E>& a) CCTK_ATTRIBUTE_PURE; \ + template \ + inline vect fn (const vect,E>& a) \ + { \ + vect r; \ + for (int e=0; e \ - inline vect fn (const vect& a, const vect& b) PURE; \ + inline vect fn (const vect& a, const vect& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a, const vect& b) { \ + inline vect fn (const vect& a, const vect& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const T& a, const vect& b) PURE; \ + inline vect fn (const T& a, const vect& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const T& a, const vect& b) { \ + inline vect fn (const T& a, const vect& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const vect& a, const T& b) PURE; \ + inline vect fn (const vect& a, const T& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a, const T& b) { \ + inline vect fn (const vect& a, const T& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect,E> fn (const vect,E>& a, const vect,E>& b) PURE; \ + inline vect,E> fn (const vect,E>& a, \ + const vect,E>& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect,E> fn (const vect,E>& a, const vect,E>& b) \ + inline vect,E> fn (const vect,E>& a, \ + const vect,E>& b) \ { \ vect,E> r; \ for (int e=0; e \ - inline vect,E> fn (const T& a, const vect,E>& b) PURE; \ + inline vect,E> fn (const T& a, const vect,E>& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const T& a, const vect,E>& b) \ { \ @@ -128,7 +143,8 @@ } \ \ template \ - inline vect,E> fn (const vect,E>& a, const T& b) PURE; \ + inline vect,E> fn (const vect,E>& a, const T& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const vect,E>& a, const T& b) \ { \ @@ -150,25 +166,28 @@ #define DECLARE_OPERATOR_1_RET(fn,op,R) \ \ template \ - inline vect fn (const vect& a) PURE; \ + inline vect fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a) { \ + inline vect fn (const vect& a) \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const T& a) PURE; \ + inline vect fn (const T& a) CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const T& a) { \ + inline vect fn (const T& a) \ + { \ vect r; \ for (int d=0; d \ - inline vect,E> fn (const vect,E>& a) PURE; \ + inline vect,E> fn (const vect,E>& a) \ + CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const vect,E>& a) \ { \ @@ -178,7 +197,7 @@ } \ \ template \ - inline vect,E> fn (const T& a) PURE; \ + inline vect,E> fn (const T& a) CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const T& a) \ { \ @@ -194,36 +213,45 @@ #define DECLARE_OPERATOR_2_RET(fn,op,R) \ \ template \ - inline vect fn (const vect& a, const vect& b) PURE; \ + inline vect fn (const vect& a, const vect& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a, const vect& b) { \ + inline vect fn (const vect& a, const vect& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const T& a, const vect& b) PURE; \ + inline vect fn (const T& a, const vect& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const T& a, const vect& b) { \ + inline vect fn (const T& a, const vect& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect fn (const vect& a, const T& b) PURE; \ + inline vect fn (const vect& a, const T& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a, const T& b) { \ + inline vect fn (const vect& a, const T& b) \ + { \ vect r; \ for (int d=0; d \ - inline vect,E> fn (const vect,E>& a, const vect,E>& b) PURE; \ + inline vect,E> fn (const vect,E>& a, \ + const vect,E>& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ - inline vect,E> fn (const vect,E>& a, const vect,E>& b) \ + inline vect,E> fn (const vect,E>& a, \ + const vect,E>& b) \ { \ vect,E> r; \ for (int e=0; e \ - inline vect,E> fn (const T& a, const vect,E>& b) PURE; \ + inline vect,E> fn (const T& a, const vect,E>& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const T& a, const vect,E>& b) \ { \ @@ -241,14 +270,15 @@ } \ \ template \ - inline vect,E> fn (const vect,E>& a, const T& b) PURE; \ + inline vect,E> fn (const vect,E>& a, const T& b) \ + CCTK_ATTRIBUTE_PURE; \ template \ inline vect,E> fn (const vect,E>& a, const T& b) \ { \ vect,E> r; \ for (int e=0; e \ - inline vect fn (const vect& a) PURE; \ + inline vect fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ template \ - inline vect fn (const vect& a) { \ + inline vect fn (const vect& a) \ + { \ vect r; \ for (int d=0; d \ - inline R fn (const vect& a) PURE; \ + inline R fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ template \ - inline R fn (const vect& a) { \ + inline R fn (const vect& a) \ + { \ R r(init); \ for (int d=0; d \ - inline T fn (const vect& a) PURE; \ - template \ - inline T fn (const vect& a) { \ - T r(init); \ - for (int d=0; d \ + inline T fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ + template \ + inline T fn (const vect& a) \ + { \ + T r(init); \ + for (int d=0; d \ - inline T fn (const vect& a) PURE; \ + inline T fn (const vect& a) CCTK_ATTRIBUTE_PURE; \ template \ - inline T fn (const vect& a) { \ + inline T fn (const vect& a) \ + { \ T r(init); \ for (int d=0; d \ - inline T fn (const vect& a, const vect& b) PURE; \ - template \ - inline T fn (const vect& a, const vect& b) { \ - T r(init); \ - for (int d=0; d \ + inline T fn (const vect& a, const vect& b) \ + CCTK_ATTRIBUTE_PURE; \ + template \ + inline T fn (const vect& a, const vect& b) \ + { \ + T r(init); \ + for (int d=0; d