aboutsummaryrefslogtreecommitdiff
path: root/Carpet/CarpetLib
diff options
context:
space:
mode:
Diffstat (limited to 'Carpet/CarpetLib')
-rw-r--r--Carpet/CarpetLib/README6
-rw-r--r--Carpet/CarpetLib/interface.ccl3
-rw-r--r--Carpet/CarpetLib/param.ccl61
-rw-r--r--Carpet/CarpetLib/schedule.ccl4
-rw-r--r--Carpet/CarpetLib/src/bbox.cc58
-rw-r--r--Carpet/CarpetLib/src/bbox.hh19
-rw-r--r--Carpet/CarpetLib/src/bboxset.cc51
-rw-r--r--Carpet/CarpetLib/src/bboxset.hh102
-rw-r--r--Carpet/CarpetLib/src/commstate.cc710
-rw-r--r--Carpet/CarpetLib/src/commstate.hh135
-rw-r--r--Carpet/CarpetLib/src/copy_3d.cc2
-rw-r--r--Carpet/CarpetLib/src/copy_4d.cc142
-rw-r--r--Carpet/CarpetLib/src/data.cc418
-rw-r--r--Carpet/CarpetLib/src/data.hh30
-rw-r--r--Carpet/CarpetLib/src/defs.cc163
-rw-r--r--Carpet/CarpetLib/src/defs.hh302
-rw-r--r--Carpet/CarpetLib/src/dh.cc1232
-rw-r--r--Carpet/CarpetLib/src/dh.hh123
-rw-r--r--Carpet/CarpetLib/src/dist.cc212
-rw-r--r--Carpet/CarpetLib/src/dist.hh295
-rw-r--r--Carpet/CarpetLib/src/fulltree.cc46
-rw-r--r--Carpet/CarpetLib/src/fulltree.hh4
-rw-r--r--Carpet/CarpetLib/src/gdata.cc198
-rw-r--r--Carpet/CarpetLib/src/gdata.hh43
-rw-r--r--Carpet/CarpetLib/src/gf.cc12
-rw-r--r--Carpet/CarpetLib/src/gf.hh12
-rw-r--r--Carpet/CarpetLib/src/ggf.cc174
-rw-r--r--Carpet/CarpetLib/src/ggf.hh22
-rw-r--r--Carpet/CarpetLib/src/gh.cc136
-rw-r--r--Carpet/CarpetLib/src/gh.hh51
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_2tl.cc2
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_3tl.cc2
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_4tl.cc2
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_5tl.cc2
-rw-r--r--Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc2
-rw-r--r--Carpet/CarpetLib/src/limits.cc95
-rw-r--r--Carpet/CarpetLib/src/limits.hh8
-rw-r--r--Carpet/CarpetLib/src/make.code.defn12
-rw-r--r--Carpet/CarpetLib/src/mem.cc101
-rw-r--r--Carpet/CarpetLib/src/mem.hh11
-rw-r--r--Carpet/CarpetLib/src/mpi_string.cc480
-rw-r--r--Carpet/CarpetLib/src/mpi_string.hh55
-rw-r--r--Carpet/CarpetLib/src/operator_prototypes.hh241
-rw-r--r--Carpet/CarpetLib/src/operator_prototypes_3d.hh305
-rw-r--r--Carpet/CarpetLib/src/operator_prototypes_4d.hh92
-rw-r--r--Carpet/CarpetLib/src/operators.hh4
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc320
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc390
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc545
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc45
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc44
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc44
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc46
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc851
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc44
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc44
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc44
-rw-r--r--Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc602
-rw-r--r--Carpet/CarpetLib/src/region.cc99
-rw-r--r--Carpet/CarpetLib/src/region.hh50
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc2
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_rf2.cc2
-rw-r--r--Carpet/CarpetLib/src/restrict_4d_rf2.cc141
-rw-r--r--Carpet/CarpetLib/src/startup_time.cc82
-rw-r--r--Carpet/CarpetLib/src/startup_time.hh6
-rw-r--r--Carpet/CarpetLib/src/th.cc27
-rw-r--r--Carpet/CarpetLib/src/th.hh15
-rw-r--r--Carpet/CarpetLib/src/timestat.cc175
-rw-r--r--Carpet/CarpetLib/src/vect.cc39
-rw-r--r--Carpet/CarpetLib/src/vect.hh286
-rw-r--r--Carpet/CarpetLib/src/vect_helpers.hh184
71 files changed, 8382 insertions, 1925 deletions
diff --git a/Carpet/CarpetLib/README b/Carpet/CarpetLib/README
index 82e529534..6e2e17abc 100644
--- a/Carpet/CarpetLib/README
+++ b/Carpet/CarpetLib/README
@@ -1,7 +1,9 @@
Cactus Code Thorn CarpetLib
-Authors : Erik Schnetter <schnetter@uni-tuebingen.de>
+Author(s) : Erik Schnetter <schnetter@cct.lsu.edu>
+Maintainer(s): Erik Schnetter <schnetter@cct.lsu.edu>
+Licence : GPLv2+
--------------------------------------------------------------------------
-Purpose of the thorn:
+1. Purpose
This thorn contains the backend library that provides mesh refinement.
diff --git a/Carpet/CarpetLib/interface.ccl b/Carpet/CarpetLib/interface.ccl
index 6e7462678..c88cf032d 100644
--- a/Carpet/CarpetLib/interface.ccl
+++ b/Carpet/CarpetLib/interface.ccl
@@ -2,6 +2,8 @@
IMPLEMENTS: CarpetLib
+includes header: mpi_string.hh in mpi_string.hh
+
includes header: defs.hh in defs.hh
includes header: dist.hh in dist.hh
includes header: typeprops.hh in typeprops.hh
@@ -28,6 +30,7 @@ includes header: th.hh in th.hh
includes header: operators.hh in operators.hh
uses include header: carpet_typecase.hh
+uses include header: CarpetTimers.hh
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 45724f25d..31a43a135 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -41,19 +41,48 @@ BOOLEAN poison_new_memory "Try to catch uninitialised data by setting newly allo
{
} "no"
+RESTRICTED:
+
CCTK_INT poison_value "Integer value (0..255) used to poison new timelevels (with memset)" STEERABLE=always
{
0:255 :: "Must fit into a byte. Use 0 for zero, 255 for nan, and e.g. 113 for a large value."
} 255
+CCTK_INT deadbeef "A strange integer value that indicates that something has gone wrong; the integer equivalent of a nan" STEERABLE=always
+{
+ *:* :: "should be large and positive"
+} 666 # 7353315
+
+PRIVATE:
+
+# System limits
+
+INT max_core_size_MB "Maximum size of a core file, set via setrlimit" STEERABLE=recover
+{
+ -2 :: "unchanged"
+ -1 :: "unlimited"
+ 0:* :: "limited"
+} -2
+
+INT max_memory_size_MB "Maximum amount of memory per MPI process, set via setrlimit" STEERABLE=recover
+{
+ -2 :: "unchanged"
+ -1 :: "unlimited"
+ 0:* :: "limited"
+} -2
+
+
+
+# Statistics
+
INT print_timestats_every "Print timing statistics periodically" STEERABLE=always
{
-1 :: "don't report"
- 0 :: "don't report"
+ 0 :: "report after initialisation"
1:* :: "report every so many iterations"
-} 0
+} -1
STRING timestat_file "File name in which timestat output is collected (because stdout from the root node may not be enough)" STEERABLE=always
{
@@ -70,7 +99,7 @@ INT print_memstats_every "Report periodically how much memory is used per proces
1:* :: "report every so many iterations"
} 0
-INT max_allowed_memory_MB "Maximum allowed amount of memory per process (in Megabytes)" STEERABLE=always
+INT max_allowed_memory_MB "Maximum allowed amount of memory per process that can be allocated for grid variables (in Megabytes)" STEERABLE=always
{
-1 :: "no maximum"
0 :: "no maximum"
@@ -85,21 +114,29 @@ STRING memstat_file "File name in which memstat output is collected (because std
-# Experimental recomposing parameters
-
BOOLEAN combine_recompose "Recompose all grid functions of one refinement levels at once" STEERABLE=always
{
-} "no"
+} "yes"
-# Experimental communication parameters
+# Communication experiment parameters
-BOOLEAN interleave_communications "Try to interleave communications with each other; each processor begins to communicate with its 'right neighbour' in rank, instead of with the root processor" STEERABLE=always
+INT message_size_multiplier "Enlarge size of transmitted messages by this factor" STEERABLE=always
{
-} "no"
+ 1:* :: ""
+} 1
+
+INT message_count_multiplier "Transmit messages this many times" STEERABLE=always
+{
+ 1:* :: ""
+} 1
-BOOLEAN vary_tags "Use different tags for each communication" STEERABLE=always
+
+
+# Experimental communication parameters
+
+BOOLEAN interleave_communications "Try to interleave communications with each other; each processor begins to communicate with its 'right neighbour' in rank, instead of with the root processor" STEERABLE=always
{
} "no"
@@ -107,11 +144,11 @@ BOOLEAN barrier_between_stages "Add a barrier between the communication stages (
{
} "no"
-BOOLEAN combine_sends "Send data together and in order of processor ranks" STEERABLE=always
+BOOLEAN check_communication_schedule "Check the communication schedule at run time (expensive)" STEERABLE=always
{
} "no"
-BOOLEAN reduce_mpi_waitall "Call MPI_Waitall only for requests that are not null" STEERABLE=always
+BOOLEAN combine_sends "Send data together and in order of processor ranks" STEERABLE=always
{
} "no"
diff --git a/Carpet/CarpetLib/schedule.ccl b/Carpet/CarpetLib/schedule.ccl
index d371bc7a5..a208c97ca 100644
--- a/Carpet/CarpetLib/schedule.ccl
+++ b/Carpet/CarpetLib/schedule.ccl
@@ -1,10 +1,10 @@
# Schedule definitions for thorn CarpetLib
-SCHEDULE CarpetLib_setmemlimit AT WRAGH
+SCHEDULE CarpetLib_registercycleclock AT startup BEFORE Driver_Startup
{
LANG: C
OPTIONS: global
-} "Set operating system memory limit"
+} "Register cycle based timer"
SCHEDULE CarpetLib_printtimestats AT analysis
{
diff --git a/Carpet/CarpetLib/src/bbox.cc b/Carpet/CarpetLib/src/bbox.cc
index a14e1e9a3..164d63884 100644
--- a/Carpet/CarpetLib/src/bbox.cc
+++ b/Carpet/CarpetLib/src/bbox.cc
@@ -1,4 +1,4 @@
-
+#include <algorithm>
#include <cassert>
#include <iostream>
#include <limits>
@@ -6,7 +6,8 @@
#include <string>
#include <typeinfo>
-#include "cctk.h"
+#include <cctk.h>
+#include <cctk_Parameters.h>
#include "defs.hh"
#include "vect.hh"
@@ -21,8 +22,8 @@ using namespace std;
template<class T, int D>
void bbox<T,D>::assert_bbox_limits () const
{
- assert (all(_stride>T(0)));
- assert (all((_upper-_lower)%_stride == T(0)));
+ ASSERT_BBOX (all(_stride>T(0)));
+ ASSERT_BBOX (all((_upper-_lower)%_stride == T(0)));
if (numeric_limits<T>::is_integer) {
// prevent accidental wrap-around
if (any (_lower >= numeric_limits<T>::max() / 2) or
@@ -30,29 +31,34 @@ void bbox<T,D>::assert_bbox_limits () const
any (_upper >= numeric_limits<T>::max() / 2) or
any (_upper <= numeric_limits<T>::min() / 2))
{
- ostringstream lbuf, ubuf, sbuf;
- lbuf << _lower;
- ubuf << _upper;
- sbuf << _stride;
- string const lstr = lbuf.str();
- string const ustr = ubuf.str();
- string const sstr = sbuf.str();
- CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Tried to create a very large bbox [%s,%s,%s] of type %s -- it is likely that this would lead to an integer overflow",
- lstr.c_str(), ustr.c_str(), sstr.c_str(),
- typeid(*this).name());
+ ostringstream buf;
+ T dummy;
+ buf << "Tried to create a very large bbox [" << _lower << "," << _upper << "," << _stride << "] for the type " << typeid(dummy).name() << " -- it is likely that this would lead to an integer overflow";
+ CCTK_WARN (CCTK_WARN_ABORT, buf.str().c_str());
}
}
}
+// Poison
+template<class T, int D>
+bbox<T,D> bbox<T,D>::poison ()
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ vect<T,D> const v (deadbeef);
+ return bbox (v, v, v);
+}
+
+
+
// Accessors
template<class T, int D>
typename bbox<T,D>::size_type bbox<T,D>::size () const {
if (empty()) return 0;
const vect<T,D> sh(shape()/stride());
-#ifdef NDEBUG
+#ifndef CARPET_DEBUG
return prod(vect<size_type,D>(sh));
#else
size_type sz = 1, max = numeric_limits<size_type>::max();
@@ -103,7 +109,7 @@ bool bbox<T,D>::is_aligned_with (const bbox& b) const {
template<class T, int D>
bool bbox<T,D>::operator== (const bbox& b) const {
if (empty() and b.empty()) return true;
- assert (all(stride()==b.stride()));
+ ASSERT_BBOX (all(stride()==b.stride()));
return all(lower()==b.lower() and upper()==b.upper());
}
@@ -160,8 +166,8 @@ bool bbox<T,D>::operator> (const bbox& b) const {
template<class T, int D>
bbox<T,D> bbox<T,D>::expand (const vect<T,D>& lo, const vect<T,D>& hi) const {
// Allow expansion only into directions where the extent is not negative
- // assert (all(lower()<=upper() or (lo==T(0) and hi==T(0))));
- assert (all(shape()>=vect<T,D>(0) or (lo==T(0) and hi==T(0))));
+ // ASSERT_BBOX (all(lower()<=upper() or (lo==T(0) and hi==T(0))));
+ ASSERT_BBOX (all(shape()>=vect<T,D>(0) or (lo==T(0) and hi==T(0))));
const vect<T,D> str = stride();
const vect<T,D> lb = lower() - lo * str;
const vect<T,D> ub = upper() + hi * str;
@@ -197,7 +203,7 @@ template<class T, int D>
bbox<T,D> bbox<T,D>::expanded_containing (const bbox& b) const {
if (empty()) return b;
if (b.empty()) return *this;
- assert (is_aligned_with(b));
+ ASSERT_BBOX (is_aligned_with(b));
const vect<T,D> lo = min(lower(), b.lower());
const vect<T,D> up = max(upper(), b.upper());
const vect<T,D> str = min(stride(), b.stride());
@@ -268,12 +274,13 @@ void bbox<T,D>::input (istream& is) {
consume (is, '/');
size_type size_dummy;
is >> size_dummy;
- assert (is.good());
+ ASSERT_BBOX (is.good());
skipws (is);
}
consume (is, ')');
} catch (input_error &err) {
- cout << "Input error while reading a bbox" << endl;
+ T Tdummy;
+ cout << "Input error while reading a bbox<" << typestring(Tdummy) << "," << D << ">" << endl;
throw err;
}
if (any(_stride<=T(0))) {
@@ -286,8 +293,8 @@ void bbox<T,D>::input (istream& is) {
<< " The stride does not evenly divide the extent." << endl;
throw input_error();
}
- assert (all(_stride>T(0)));
- assert (all((_upper-_lower)%_stride == T(0)));
+ ASSERT_BBOX (all(_stride>T(0)));
+ ASSERT_BBOX (all((_upper-_lower)%_stride == T(0)));
}
@@ -308,4 +315,5 @@ template class bbox<int,0>;
template class bbox<int,1>;
template class bbox<int,2>;
template class bbox<int,3>;
-template class bbox<CCTK_REAL,3>;
+template class bbox<int,4>;
+template class bbox<CCTK_REAL,dim>;
diff --git a/Carpet/CarpetLib/src/bbox.hh b/Carpet/CarpetLib/src/bbox.hh
index a79fbe87b..8386f58a2 100644
--- a/Carpet/CarpetLib/src/bbox.hh
+++ b/Carpet/CarpetLib/src/bbox.hh
@@ -13,6 +13,14 @@ using namespace std;
+#ifdef CARPET_DEBUG
+# define ASSERT_BBOX(x) assert(x)
+#else
+# define ASSERT_BBOX(x)
+#endif
+
+
+
// Forward declaration
template<class T, int D> class bbox;
@@ -69,11 +77,14 @@ public:
const vect<T,D>& stride_)
: _lower(lower_), _upper(upper_), _stride(stride_)
{
-#ifndef NDEBUG
+#ifndef CARPET_DEBUG
assert_bbox_limits();
#endif
}
+ // Poison
+ static bbox poison ();
+
// Accessors
// (Don't return references; *this might be a temporary)
@@ -131,7 +142,7 @@ public:
bbox b. */
bbox operator& (const bbox& b) const
{
- assert (all(stride()==b.stride()));
+ ASSERT_BBOX (all(stride()==b.stride()));
vect<T,D> lo = max(lower(),b.lower());
vect<T,D> up = min(upper(),b.upper());
return bbox(lo,up,stride());
@@ -179,7 +190,7 @@ public:
iterator end () const;
// Memory usage
- size_t memory () const
+ size_t memory () const CCTK_ATTRIBUTE_CONST
{
return memoryof (_lower) + memoryof (_upper) + memoryof (_stride);
}
@@ -194,6 +205,8 @@ public:
// Memory usage
template<class T, int D>
+inline size_t memoryof (bbox<T,D> const & b) CCTK_ATTRIBUTE_CONST;
+template<class T, int D>
inline size_t memoryof (bbox<T,D> const & b) { return b.memory(); }
diff --git a/Carpet/CarpetLib/src/bboxset.cc b/Carpet/CarpetLib/src/bboxset.cc
index eceb264d8..a5748a7c6 100644
--- a/Carpet/CarpetLib/src/bboxset.cc
+++ b/Carpet/CarpetLib/src/bboxset.cc
@@ -51,6 +51,11 @@ bboxset<T,D>::bboxset (const vector<list<box> >& vlb) {
normalize();
}
+template<class T, int D>
+bboxset<T,D> bboxset<T,D>::poison () {
+ return bboxset (bbox<T,D>::poison());
+}
+
// Invariant
@@ -455,16 +460,54 @@ bool bboxset<T,D>::operator!= (const bboxset<T,D>& s) const {
+// Input
+template<class T,int D>
+istream& bboxset<T,D>::input (istream& is) {
+ T Tdummy;
+ try {
+ skipws (is);
+ consume (is, "bboxset<");
+ consume (is, typestring(Tdummy));
+ consume (is, ",");
+ int D_;
+ is >> D_;
+ if (D_ != D) {
+ cout << "Input error: Wrong bboxset dimension " << D_ << ", expected " << D << endl;
+ throw input_error();
+ }
+ consume (is, ">:{");
+ consume (is, "size=");
+ size_type size_;
+ is >> size_;
+ consume (is, ",");
+ consume (is, "setsize=");
+ int setsize_;
+ is >> setsize_;
+ consume (is, ",");
+ consume (is, "set=");
+ is >> bs;
+ consume (is, "}");
+ } catch (input_error & err) {
+ cout << "Input error while reading a bboxset<" << typestring(Tdummy) << "," << D << ">" << endl;
+ throw err;
+ }
+ return is;
+}
+
+
+
// Output
template<class T,int D>
-void bboxset<T,D>::output (ostream& os) const {
+ostream& bboxset<T,D>::output (ostream& os) const {
T Tdummy;
- os << "bboxset<" << typestring(Tdummy) << "," << D << ">:"
+ os << "bboxset<" << typestring(Tdummy) << "," << D << ">:{"
<< "size=" << size() << ","
<< "setsize=" << setsize() << ","
- << "set=" << bs;
+ << "set=" << bs
+ << "}";
+ return os;
}
-template class bboxset<int,3>;
+template class bboxset<int,dim>;
diff --git a/Carpet/CarpetLib/src/bboxset.hh b/Carpet/CarpetLib/src/bboxset.hh
index 45e0dc0f2..5d206da33 100644
--- a/Carpet/CarpetLib/src/bboxset.hh
+++ b/Carpet/CarpetLib/src/bboxset.hh
@@ -28,6 +28,10 @@ template<class T, int D> class bboxset;
// template<class T,int D>
// bboxset<T,D> operator- (const bbox<T,D>& b, const bboxset<T,D>& s);
+// Input
+template<class T,int D>
+istream& operator>> (istream& is, bboxset<T,D>& s);
+
// Output
template<class T,int D>
ostream& operator<< (ostream& os, const bboxset<T,D>& s);
@@ -60,6 +64,8 @@ public:
bboxset (const list<box>& lb);
bboxset (const vector<list<box> >& vlb);
+ static bboxset poison ();
+
// Invariant
bool invariant () const;
@@ -143,10 +149,13 @@ public:
// iterator end () const { return bs.end(); }
// Memory usage
- size_t memory () const { return memoryof (bs); }
+ size_t memory () const CCTK_ATTRIBUTE_PURE { return memoryof (bs); }
+
+ // Input
+ istream& input (istream& is);
// Output
- void output (ostream& os) const;
+ ostream& output (ostream& os) const;
};
@@ -186,64 +195,112 @@ inline bboxset<T,D> operator& (const bbox<T,D>& b, const bboxset<T,D>& s) {
template<class T,int D>
-inline bool operator== (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator== (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator== (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) == s;
}
template<class T,int D>
-inline bool operator!= (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator!= (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator!= (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) != s;
}
template<class T,int D>
-inline bool operator< (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator< (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator< (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) < s;
}
template<class T,int D>
-inline bool operator<= (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator<= (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator<= (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) <= s;
}
template<class T,int D>
-inline bool operator> (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator> (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator> (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) > s;
}
template<class T,int D>
-inline bool operator>= (const bbox<T,D>& b, const bboxset<T,D>& s) {
+inline bool operator>= (const bbox<T,D>& b, const bboxset<T,D>& s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator>= (const bbox<T,D>& b, const bboxset<T,D>& s)
+{
return bboxset<T,D>(b) >= s;
}
template<class T,int D>
-inline bool operator== (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator== (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator== (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s == bboxset<T,D>(b);
}
template<class T,int D>
-inline bool operator!= (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator!= (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator!= (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s != bboxset<T,D>(b);
}
template<class T,int D>
-inline bool operator< (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator< (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator< (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s < bboxset<T,D>(b);
}
template<class T,int D>
-inline bool operator<= (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator<= (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator<= (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s <= bboxset<T,D>(b);
}
template<class T,int D>
-inline bool operator> (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator> (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator> (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s > bboxset<T,D>(b);
}
template<class T,int D>
-inline bool operator>= (const bboxset<T,D>& s, const bbox<T,D>& b) {
+inline bool operator>= (const bboxset<T,D>& s, const bbox<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<class T,int D>
+inline bool operator>= (const bboxset<T,D>& s, const bbox<T,D>& b)
+{
return s >= bboxset<T,D>(b);
}
@@ -251,15 +308,26 @@ inline bool operator>= (const bboxset<T,D>& s, const bbox<T,D>& b) {
// Memory usage
template<class T, int D>
-inline size_t memoryof (bboxset<T,D> const & s) { return s.memory(); }
+inline size_t memoryof (bboxset<T,D> const & s)
+ CCTK_ATTRIBUTE_PURE;
+template<class T, int D>
+inline size_t memoryof (bboxset<T,D> const & s)
+{ return s.memory(); }
+
+
+
+// Input
+template<class T,int D>
+inline istream& operator>> (istream& is, bboxset<T,D>& s) {
+ return s.input(is);
+}
// Output
template<class T,int D>
inline ostream& operator<< (ostream& os, const bboxset<T,D>& s) {
- s.output(os);
- return os;
+ return s.output(os);
}
diff --git a/Carpet/CarpetLib/src/commstate.cc b/Carpet/CarpetLib/src/commstate.cc
index 7a00157c2..ef1b64cdb 100644
--- a/Carpet/CarpetLib/src/commstate.cc
+++ b/Carpet/CarpetLib/src/commstate.cc
@@ -21,241 +21,463 @@ using namespace CarpetLib;
+char const * tostring (astate const & thestate)
+{
+ switch (thestate) {
+ case state_get_buffer_sizes: return "state_get_buffer_sizes";
+ case state_fill_send_buffers: return "state_fill_send_buffers";
+ case state_do_some_work: return "state_do_some_work";
+ case state_empty_recv_buffers: return "state_empty_recv_buffers";
+ case state_done: return "state_done";
+ default: assert(0); abort();
+ }
+ return NULL;
+}
+
+
+
// Communication state control
comm_state::comm_state ()
{
- // A comm_state object will step through
- // state_get_buffer_sizes
- // state_fill_send_buffers
- // state_empty_recv_buffers
-
DECLARE_CCTK_PARAMETERS;
-
+
static Timer timer ("commstate::create");
timer.start ();
thestate = state_get_buffer_sizes;
-
+
typebufs.resize (dist::c_ndatatypes());
-#define INSTANTIATE(T) \
- { \
- T dummy; \
- int const type = dist::c_datatype (dummy); \
- assert (typebufs.AT(type).datatypesize == 0); \
- typebufs.AT(type).datatypesize = sizeof dummy; \
- typebufs.AT(type).mpi_datatype = dist::datatype (dummy); \
- typebufs.AT(type).procbufs.resize (dist::size()); \
+#define INSTANTIATE(T) \
+ { \
+ T dummy; \
+ unsigned const type = dist::c_datatype (dummy); \
+ typebufs.AT(type).mpi_datatype = dist::mpi_datatype (dummy); \
+ typebufs.AT(type).datatypesize = sizeof dummy; \
}
#include "instantiate"
#undef INSTANTIATE
-
- srequests.resize (dist::c_ndatatypes() * dist::size(), MPI_REQUEST_NULL);
- rrequests.resize (dist::c_ndatatypes() * dist::size(), MPI_REQUEST_NULL);
+
+ srequests.reserve (dist::c_ndatatypes() * dist::size());
+ rrequests.reserve (dist::c_ndatatypes() * dist::size());
timer.stop (0);
}
+
void comm_state::step ()
{
DECLARE_CCTK_PARAMETERS;
static Timer total ("commstate::step");
total.start ();
- assert (thestate != state_done);
+
+ if (barrier_between_stages) {
+ // Add a barrier, ensuring e.g. that all Irecvs are posted before
+ // the first Isends are made
+ if (commstate_verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "before MPI_Barrier; state=%s", tostring(thestate));
+ }
+ MPI_Barrier (dist::comm());
+ if (commstate_verbose) {
+ CCTK_INFO ("after MPI_Barrier");
+ }
+ }
+
switch (thestate) {
- case state_get_buffer_sizes:
+
+
+ case state_get_buffer_sizes: {
+
+ if (check_communication_schedule) {
+ vector<int> sendcount(dist::size() * dist::c_ndatatypes());
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ for (int proc = 0; proc < dist::size(); ++ proc) {
+ sendcount.AT(proc * dist::c_ndatatypes() + type) =
+ typebufs.AT(type).in_use ?
+ typebufs.AT(type).procbufs.AT(proc).sendbufsize :
+ 0;
+ }
+ assert (sendcount.AT(dist::rank() * dist::c_ndatatypes() + type) == 0);
+ }
+ vector<int> recvcount(dist::size() * dist::c_ndatatypes());
+ if (commstate_verbose) {
+ CCTK_INFO ("before MPI_Alltoall");
+ }
+ MPI_Alltoall (&sendcount.front(), dist::c_ndatatypes(), MPI_INT,
+ &recvcount.front(), dist::c_ndatatypes(), MPI_INT,
+ dist::comm());
+ if (commstate_verbose) {
+ CCTK_INFO ("after MPI_Alltoall");
+ }
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ for (int proc = 0; proc < dist::size(); ++ proc) {
+ assert (recvcount.AT(proc * dist::c_ndatatypes() + type) ==
+ (typebufs.AT(type).in_use ?
+ int (typebufs.AT(type).procbufs.AT(proc).recvbufsize) :
+ 0));
+ }
+ assert (recvcount.AT(dist::rank() * dist::c_ndatatypes() + type) == 0);
+ }
+ }
+
// The sizes of the collective communication buffers are known so
// now allocate them.
// The receive operations are also posted here already (a clever
// MPI layer may take advantage of such early posting).
- num_posted_recvs = num_completed_recvs = 0;
- for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
- size_t const proc =
- interleave_communications
- ? (proc1 + dist::rank()) % dist::size()
- : proc1;
-
- for (size_t type = 0; type < typebufs.size(); type++) {
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
- // skip unused datatype buffers
- if (not typebufs.AT(type).in_use) continue;
-
- int datatypesize = typebufs.AT(type).datatypesize;
- procbufdesc& procbuf = typebufs.AT(type).procbufs.AT(proc);
-
- assert (procbuf.sendbufbase.empty());
- assert (procbuf.recvbufbase.empty());
- procbuf.sendbufbase.resize (procbuf.sendbufsize*datatypesize);
- procbuf.recvbufbase.resize (procbuf.recvbufsize*datatypesize);
- // TODO: this may be a bit extreme, and it is only for
- // internal consistency checking
- if (poison_new_memory) {
- memset (&procbuf.sendbufbase.front(), poison_value,
- procbuf.sendbufsize*datatypesize);
- memset (&procbuf.recvbufbase.front(), poison_value,
- procbuf.recvbufsize*datatypesize);
- }
- procbuf.sendbuf = &procbuf.sendbufbase.front();
- procbuf.recvbuf = &procbuf.recvbufbase.front();
-
- if (procbuf.recvbufsize > 0) {
- static Timer timer ("commstate_sizes_irecv");
- timer.start ();
- int const tag =
- vary_tags
- ? (dist::rank() + dist::size() * (proc + dist::size() * type)) % 32768
- : type;
- if (commstate_verbose) {
- CCTK_VInfo (CCTK_THORNSTRING,
- "About to MPI_Irecv from %d", (int)proc);
+ for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
+ int const proc =
+ interleave_communications ?
+ (proc1 + dist::rank()) % dist::size() :
+ proc1;
+
+ int const datatypesize = typebufs.AT(type).datatypesize;
+ procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc);
+
+ assert (procbuf.sendbufbase.empty());
+ assert (procbuf.recvbufbase.empty());
+ procbuf.sendbufbase.resize
+ (procbuf.sendbufsize * datatypesize * message_size_multiplier);
+ procbuf.recvbufbase.resize
+ (procbuf.recvbufsize * datatypesize * message_size_multiplier);
+ // TODO: this may be a bit extreme, and it is only for
+ // internal consistency checking
+ if (poison_new_memory) {
+ memset (&procbuf.sendbufbase.front(), poison_value,
+ procbuf.sendbufsize * datatypesize * message_size_multiplier);
+ memset (&procbuf.recvbufbase.front(), poison_value,
+ procbuf.recvbufsize * datatypesize * message_size_multiplier);
}
- MPI_Irecv (&procbuf.recvbufbase.front(), procbuf.recvbufsize,
- typebufs.AT(type).mpi_datatype, proc, tag,
- dist::comm(), &rrequests.AT(dist::size()*type + proc));
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Irecv");
+ procbuf.sendbuf = &procbuf.sendbufbase.front();
+ procbuf.recvbuf = &procbuf.recvbufbase.front();
+
+ if (procbuf.recvbufsize > 0) {
+ static Timer timer ("commstate::sizes_irecv");
+ timer.start ();
+ int const tag = type;
+ if (commstate_verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "About to MPI_Irecv from processor %d for type %s",
+ proc, dist::c_datatype_name(type));
+ }
+ MPI_Irecv (&procbuf.recvbufbase.front(),
+ procbuf.recvbufsize * message_size_multiplier,
+ typebufs.AT(type).mpi_datatype, proc, tag,
+ dist::comm(), &push_back(rrequests));
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Irecv");
+ }
+ assert (not procbuf.did_post_recv);
+ procbuf.did_post_recv = true;
+ timer.stop (procbuf.recvbufsize * datatypesize);
}
- timer.stop (procbuf.recvbufsize * datatypesize);
- num_posted_recvs++;
- }
+
+ } // for proc
+
}
- }
+ } // for type
- if (barrier_between_stages) {
- // Add a barrier, to try to ensure that all Irecvs are posted
- // before the first Isends are made
- // (Alternative: Use MPI_Alltoallv instead)
- MPI_Barrier (dist::comm());
+ if (check_communication_schedule) {
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
+ for (int proc = 0; proc < dist::size(); ++ proc) {
+ procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc);
+ assert (procbuf.did_post_recv == (procbuf.recvbufsize > 0));
+ }
+ }
+ }
}
- // Now go and get the send buffers filled with data.
- // Once a buffer is full it will be posted right away
- // (see gdata::copy_into_sendbuffer() and
- // gdata::interpolate_into_sendbuffer()).
thestate = state_fill_send_buffers;
break;
+ }
+
- case state_fill_send_buffers:
+
+ case state_fill_send_buffers: {
if (combine_sends) {
- // Send the data. Do not send them sequentially, but try to
- // intersperse the communications
- for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
- int const proc =
- interleave_communications
- ? (proc1 + dist::size() - dist::rank()) % dist::size()
- : proc1;
-
- for (size_t type = 0; type < typebufs.size(); type++) {
- // skip unused datatype buffers
- if (not typebufs.AT(type).in_use) continue;
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
- int const datatypesize = typebufs.AT(type).datatypesize;
- procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc);
-
- size_t const fillstate =
- procbuf.sendbuf - &procbuf.sendbufbase.front();
- assert (fillstate == procbuf.sendbufsize * datatypesize);
-
- if (procbuf.sendbufsize > 0) {
- int const tag =
- vary_tags
- ? (proc + dist::size() * (dist::rank() + dist::size() * type)) % 32768
- : type;
- if (use_mpi_send) {
- // use MPI_Send
- static Timer timer ("commstate_send");
- timer.start ();
- if (commstate_verbose) {
- CCTK_VInfo (CCTK_THORNSTRING,
- "About to MPI_Send to %d", (int)proc);
+ for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
+ int const proc =
+ interleave_communications
+ ? (proc1 + dist::size() - dist::rank()) % dist::size()
+ : proc1;
+
+ procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc);
+ if (procbuf.sendbufsize > 0) {
+
+ int const datatypesize = typebufs.AT(type).datatypesize;
+
+ size_t const fillstate =
+ procbuf.sendbuf - &procbuf.sendbufbase.front();
+ assert (fillstate == procbuf.sendbufsize * datatypesize);
+
+ // Enlarge messages for performance testing
+ if (message_size_multiplier > 1) {
+ size_t const nbytes =
+ procbuf.sendbufsize * datatypesize *
+ (message_size_multiplier - 1);
+#warning "TODO"
+ // memset (procbuf.sendbuf, poison_value, nbytes);
+ memset (procbuf.sendbuf, 0, nbytes);
}
- MPI_Send (const_cast<char*>(&procbuf.sendbufbase.front()),
- procbuf.sendbufsize,
- typebufs.AT(type).mpi_datatype, proc, tag,
- dist::comm());
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Send");
+
+ int const tag = type;
+ if (use_mpi_send) {
+ // use MPI_Send
+ static Timer timer ("commstate::send");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "About to MPI_Send to processor %d for type %s",
+ proc, dist::c_datatype_name(type));
+ }
+ MPI_Send (const_cast<char*>(&procbuf.sendbufbase.front()),
+ procbuf.sendbufsize * message_size_multiplier,
+ typebufs.AT(type).mpi_datatype, proc, tag,
+ dist::comm());
+ assert (not procbuf.did_post_send);
+ procbuf.did_post_send = true;
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Send");
+ }
+ timer.stop (procbuf.sendbufsize * datatypesize);
+ } else if (use_mpi_ssend) {
+ // use MPI_Ssend
+ static Timer timer ("commstate::ssend");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "About to MPI_Ssend to processor %d for type %s",
+ proc, dist::c_datatype_name(type));
+ }
+ MPI_Ssend (const_cast<char*>(&procbuf.sendbufbase.front()),
+ procbuf.sendbufsize * message_size_multiplier,
+ typebufs.AT(type).mpi_datatype, proc, tag,
+ dist::comm());
+ assert (not procbuf.did_post_send);
+ procbuf.did_post_send = true;
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Ssend");
+ }
+ timer.stop (procbuf.sendbufsize * datatypesize);
+ } else {
+ // use MPI_Isend
+ static Timer timer ("commstate::isend");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "About to MPI_Isend to processor %d for type %s",
+ proc, dist::c_datatype_name(type));
+ }
+ MPI_Isend (const_cast<char*>(&procbuf.sendbufbase.front()),
+ procbuf.sendbufsize * message_size_multiplier,
+ typebufs.AT(type).mpi_datatype, proc, tag,
+ dist::comm(), &push_back(srequests));
+ assert (not procbuf.did_post_send);
+ procbuf.did_post_send = true;
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Isend");
+ }
+ timer.stop (procbuf.sendbufsize * datatypesize);
}
- srequests.AT(dist::size()*type + proc) = MPI_REQUEST_NULL;
- timer.stop (procbuf.sendbufsize * datatypesize);
- } else if (use_mpi_ssend) {
- // use MPI_Ssend
- static Timer timer ("commstate_ssend");
+
+ }
+ } // for proc
+
+ }
+ } // for type
+ } // if combine_sends
+
+ if (check_communication_schedule) {
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
+ for (int proc = 0; proc < dist::size(); ++ proc) {
+ procbufdesc const & procbuf = typebufs.AT(type).procbufs.AT(proc);
+ assert (procbuf.did_post_send == (procbuf.sendbufsize > 0));
+ }
+ }
+ }
+ }
+
+ thestate = state_do_some_work;
+ break;
+ }
+
+
+
+ case state_do_some_work: {
+ static Timer timer ("commstate::do_some_work::waitall");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_INFO ("About to MPI_Waitall");
+ }
+ MPI_Waitall (rrequests.size(), &rrequests.front(), MPI_STATUSES_IGNORE);
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Waitall");
+ }
+ timer.stop (0);
+
+ thestate = state_empty_recv_buffers;
+ break;
+ }
+
+
+
+ case state_empty_recv_buffers: {
+ static Timer timer ("commstate::empty_recv_buffers::waitall");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_INFO ("About to MPI_Waitall");
+ }
+ MPI_Waitall (srequests.size(), &srequests.front(), MPI_STATUSES_IGNORE);
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Waitall");
+ }
+ timer.stop (0);
+
+ // Transfer messages again for performance testing
+ for (int n = 1; n < message_count_multiplier; ++ n) {
+
+ srequests.clear();
+ srequests.reserve (dist::c_ndatatypes() * dist::size());
+ rrequests.clear();
+ rrequests.reserve (dist::c_ndatatypes() * dist::size());
+
+ // Irecv
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
+
+ for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
+ int const proc =
+ interleave_communications ?
+ (proc1 + dist::rank()) % dist::size() :
+ proc1;
+
+ procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc);
+
+ if (procbuf.recvbufsize > 0) {
+ static Timer timer ("commstate::message_count_multiplier::irecv");
timer.start ();
+ int const tag = type;
if (commstate_verbose) {
CCTK_VInfo (CCTK_THORNSTRING,
- "About to MPI_Ssend to %d", (int)proc);
+ "About to MPI_Irecv from processor %d for type %s",
+ proc, dist::c_datatype_name(type));
}
- MPI_Ssend (const_cast<char*>(&procbuf.sendbufbase.front()),
- procbuf.sendbufsize,
+ MPI_Irecv (&procbuf.recvbufbase.front(),
+ procbuf.recvbufsize * message_size_multiplier,
typebufs.AT(type).mpi_datatype, proc, tag,
- dist::comm());
+ dist::comm(), &push_back(rrequests));
if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Ssend");
+ CCTK_INFO ("Finished MPI_Irecv");
}
- srequests.AT(dist::size()*type + proc) = MPI_REQUEST_NULL;
- timer.stop (procbuf.sendbufsize * datatypesize);
- } else {
- // use MPI_Isend
- static Timer timer ("commstate_isend");
+ timer.stop (procbuf.recvbufsize * typebufs.AT(type).datatypesize);
+ }
+
+ } // for proc
+
+ }
+ } // for type
+
+ // Isend
+ for (unsigned type = 0; type < dist::c_ndatatypes(); ++ type) {
+ if (typebufs.AT(type).in_use) {
+
+ for (int proc1 = 0; proc1 < dist::size(); ++ proc1) {
+ int const proc =
+ interleave_communications
+ ? (proc1 + dist::size() - dist::rank()) % dist::size()
+ : proc1;
+
+ procbufdesc & procbuf = typebufs.AT(type).procbufs.AT(proc);
+
+ if (procbuf.sendbufsize > 0) {
+ int const tag = type;
+ assert (not use_mpi_send);
+ assert (not use_mpi_ssend);
+ static Timer timer ("commstate::message_count_multiplier::isend");
timer.start ();
if (commstate_verbose) {
- CCTK_VWarn (3, __LINE__, __FILE__, CCTK_THORNSTRING,
- "About to MPI_Isend to %d", (int)proc);
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "About to MPI_Isend to processor %d for type %s",
+ proc, dist::c_datatype_name(type));
}
MPI_Isend (const_cast<char*>(&procbuf.sendbufbase.front()),
- procbuf.sendbufsize,
+ procbuf.sendbufsize * message_size_multiplier,
typebufs.AT(type).mpi_datatype, proc, tag,
- dist::comm(), &srequests.AT(dist::size()*type + proc));
+ dist::comm(), &push_back(srequests));
if (commstate_verbose) {
CCTK_INFO ("Finished MPI_Isend");
}
- timer.stop (procbuf.sendbufsize * datatypesize);
+ timer.stop (procbuf.sendbufsize * typebufs.AT(type).datatypesize);
}
- }
+
+ } // for proc
- } // for type
-
- } // for proc
- }
+ }
+ } // for type
+
+ // Waitall
+ {
+ static Timer timer ("commstate::message_count_multiplier::waitall(irecv)");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_INFO ("About to MPI_Waitall");
+ }
+ MPI_Waitall (rrequests.size(), &rrequests.front(), MPI_STATUSES_IGNORE);
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Waitall");
+ }
+ timer.stop (0);
+ }
+
+ // Waitall
+ {
+ static Timer timer ("commstate::message_count_multiplier::waitall(isend)");
+ timer.start ();
+ if (commstate_verbose) {
+ CCTK_INFO ("About to MPI_Waitall");
+ }
+ MPI_Waitall (srequests.size(), &srequests.front(), MPI_STATUSES_IGNORE);
+ if (commstate_verbose) {
+ CCTK_INFO ("Finished MPI_Waitall");
+ }
+ timer.stop (0);
+ }
+
+ } // for n
- // Now fall through to the next state in which the recv buffers
- // are emptied as soon as data has arrived.
- thestate = state_do_some_work;
+ thestate = state_done;
break;
+ }
+
+
+
+ case state_done: {
+ assert (0); abort();
+ }
- case state_do_some_work:
- // Now fall through to the next state in which the recv buffers
- // are emptied as soon as data has arrived.
- thestate = state_empty_recv_buffers;
- case state_empty_recv_buffers:
- // Finish (at least one of) the posted communications
- if (not AllPostedCommunicationsFinished ()) {
- // No state change if there are still outstanding
- // communications; do another comm_state loop iteration.
- } else {
- // Everything is done so release the collective communication buffers.
- for (size_t type = 0; type < typebufs.size(); type++) {
- for (size_t proc = 0; proc < typebufs.AT(type).procbufs.size(); proc++) {
- typebufs.AT(type).procbufs.AT(proc).sendbufbase.clear();
- typebufs.AT(type).procbufs.AT(proc).recvbufbase.clear();
- }
- }
- thestate = state_done;
- }
- break;
default:
- assert (0 && "invalid state");
+ assert (0); abort();
}
+
+
+
total.stop (0);
}
-bool comm_state::done ()
+bool comm_state::done () const
{
return thestate == state_done;
}
@@ -264,116 +486,16 @@ bool comm_state::done ()
comm_state::~comm_state ()
{
DECLARE_CCTK_PARAMETERS;
-
+
assert (thestate == state_done or
thestate == state_get_buffer_sizes);
}
-// wait for completion of posted collective buffer sends/receives
-//
-// This function will wait for all of the posted receive operations to
-// finish.
-//
-// It returns true if all posted communications have been completed.
-bool comm_state::AllPostedCommunicationsFinished ()
-{
- DECLARE_CCTK_PARAMETERS;
-
- // check if all outstanding receives have been completed already
- if (num_posted_recvs == num_completed_recvs) {
- // finalize the outstanding sends in one go
- if (reduce_mpi_waitall) {
- size_t nreqs = 0;
- for (size_t i=0; i<srequests.size(); ++i) {
- if (srequests.AT(i) != MPI_REQUEST_NULL) {
- ++nreqs;
- }
- }
- vector<MPI_Request> reqs(nreqs);
- nreqs = 0;
- for (size_t i=0; i<srequests.size(); ++i) {
- if (srequests.AT(i) != MPI_REQUEST_NULL) {
- reqs.AT(nreqs) = srequests.AT(i);
- ++nreqs;
- }
- }
- assert (nreqs == reqs.size());
- static Timer timer ("commstate_waitall_final");
- timer.start ();
- if (commstate_verbose) {
- CCTK_INFO ("About to MPI_Waitall");
- }
- MPI_Waitall (reqs.size(), &reqs.front(), MPI_STATUSES_IGNORE);
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Waitall");
- }
- timer.stop (0);
- } else {
- static Timer timer ("commstate_waitall_final");
- timer.start ();
- if (commstate_verbose) {
- CCTK_INFO ("About to MPI_Waitall");
- }
- MPI_Waitall (srequests.size(), &srequests.front(), MPI_STATUSES_IGNORE);
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Waitall");
- }
- timer.stop (0);
- }
-
- return true;
- }
-
- // wait for completion of all posted receive operations
- if (reduce_mpi_waitall) {
- size_t nreqs = 0;
- for (size_t i=0; i<rrequests.size(); ++i) {
- if (rrequests.AT(i) != MPI_REQUEST_NULL) {
- ++nreqs;
- }
- }
- vector<MPI_Request> reqs(nreqs);
- nreqs = 0;
- for (size_t i=0; i<rrequests.size(); ++i) {
- if (rrequests.AT(i) != MPI_REQUEST_NULL) {
- reqs.AT(nreqs) = rrequests.AT(i);
- ++nreqs;
- }
- }
- assert (nreqs == reqs.size());
- static Timer timer ("commstate_waitall");
- timer.start ();
- if (commstate_verbose) {
- CCTK_INFO ("About to MPI_Waitall");
- }
- MPI_Waitall (reqs.size(), &reqs.front(), MPI_STATUSES_IGNORE);
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Waitall");
- }
- timer.stop (0);
- } else {
- static Timer timer ("commstate_waitall");
- timer.start ();
- if (commstate_verbose) {
- CCTK_INFO ("About to MPI_Waitall");
- }
- MPI_Waitall (rrequests.size(), &rrequests.front(), MPI_STATUSES_IGNORE);
- if (commstate_verbose) {
- CCTK_INFO ("Finished MPI_Waitall");
- }
- timer.stop (0);
- }
- num_completed_recvs = num_posted_recvs;
-
- return false;
-}
-
-
void
comm_state::
-reserve_send_space (unsigned int const type,
+reserve_send_space (unsigned const type,
int const proc,
int const npoints)
{
@@ -381,14 +503,19 @@ reserve_send_space (unsigned int const type,
assert (proc >= 0 and proc < dist::size());
assert (npoints >= 0);
typebufdesc & typebuf = typebufs.AT(type);
+ if (not typebuf.in_use) {
+ typebuf.procbufs.resize (dist::size());
+ typebuf.in_use = true;
+ }
procbufdesc & procbuf = typebuf.procbufs.AT(proc);
procbuf.sendbufsize += npoints;
- typebuf.in_use = true;
}
+
+
void
comm_state::
-reserve_recv_space (unsigned int const type,
+reserve_recv_space (unsigned const type,
int const proc,
int const npoints)
{
@@ -396,19 +523,25 @@ reserve_recv_space (unsigned int const type,
assert (proc >= 0 and proc < dist::size());
assert (npoints >= 0);
typebufdesc & typebuf = typebufs.AT(type);
+ if (not typebuf.in_use) {
+ typebuf.procbufs.resize (dist::size());
+ typebuf.in_use = true;
+ }
procbufdesc & procbuf = typebuf.procbufs.AT(proc);
procbuf.recvbufsize += npoints;
- typebuf.in_use = true;
}
+
+
void *
comm_state::
-send_buffer (unsigned int const type,
+send_buffer (unsigned const type,
int const proc,
int const npoints)
{
assert (type < dist::c_ndatatypes());
assert (proc >= 0 and proc < dist::size());
+ assert (npoints > 0);
typebufdesc const & typebuf = typebufs.AT(type);
procbufdesc const & procbuf = typebuf.procbufs.AT(proc);
@@ -419,14 +552,17 @@ send_buffer (unsigned int const type,
return procbuf.sendbuf;
}
+
+
void *
comm_state::
-recv_buffer (unsigned int const type,
+recv_buffer (unsigned const type,
int const proc,
int const npoints)
{
assert (type < dist::c_ndatatypes());
assert (proc >= 0 and proc < dist::size());
+ assert (npoints > 0);
typebufdesc const & typebuf = typebufs.AT(type);
procbufdesc const & procbuf = typebuf.procbufs.AT(proc);
@@ -437,9 +573,11 @@ recv_buffer (unsigned int const type,
return procbuf.recvbuf;
}
+
+
void
comm_state::
-commit_send_space (unsigned int const type,
+commit_send_space (unsigned const type,
int const proc,
int const npoints)
{
@@ -448,6 +586,7 @@ commit_send_space (unsigned int const type,
assert (type < dist::c_ndatatypes());
assert (proc >= 0 and proc < dist::size());
assert (npoints >= 0);
+ assert (npoints > 0);
typebufdesc & typebuf = typebufs.AT(type);
procbufdesc & procbuf = typebuf.procbufs.AT(proc);
procbuf.sendbuf += npoints * typebuf.datatypesize;
@@ -461,16 +600,30 @@ commit_send_space (unsigned int const type,
&procbuf.sendbufbase.front() +
procbuf.sendbufsize * typebuf.datatypesize)
{
+ if (message_size_multiplier > 1) {
+ size_t const nbytes =
+ procbuf.sendbufsize * typebuf.datatypesize *
+ (message_size_multiplier - 1);
+ memset (procbuf.sendbuf, poison_value, nbytes);
+ }
+
static Timer timer ("commit_send_space::isend");
timer.start ();
if (commstate_verbose) {
CCTK_VInfo (CCTK_THORNSTRING,
- "About to MPI_Isend to %d", (int)proc);
+ "About to MPI_Isend to processor %d for type %s",
+ proc, dist::c_datatype_name(type));
}
+ int const tag = type;
+ assert (procbuf.sendbufsize > 0);
+ assert (not use_mpi_send);
+ assert (not use_mpi_ssend);
MPI_Isend (&procbuf.sendbufbase.front(),
- procbuf.sendbufsize, typebuf.mpi_datatype,
- proc, type, dist::comm(),
- & srequests.AT(type * dist::size() + proc));
+ procbuf.sendbufsize * message_size_multiplier,
+ typebuf.mpi_datatype, proc, tag,
+ dist::comm(), &push_back(srequests));
+ assert (not procbuf.did_post_send);
+ procbuf.did_post_send = true;
if (commstate_verbose) {
CCTK_INFO ("Finished MPI_Isend");
}
@@ -479,15 +632,18 @@ commit_send_space (unsigned int const type,
}
}
+
+
void
comm_state::
-commit_recv_space (unsigned int const type,
+commit_recv_space (unsigned const type,
int const proc,
int const npoints)
{
assert (type < dist::c_ndatatypes());
assert (proc >= 0 and proc < dist::size());
assert (npoints >= 0);
+ assert (npoints > 0);
typebufdesc & typebuf = typebufs.AT(type);
procbufdesc & procbuf = typebuf.procbufs.AT(proc);
procbuf.recvbuf += npoints * typebuf.datatypesize;
diff --git a/Carpet/CarpetLib/src/commstate.hh b/Carpet/CarpetLib/src/commstate.hh
index c01f732da..e59ce7cb4 100644
--- a/Carpet/CarpetLib/src/commstate.hh
+++ b/Carpet/CarpetLib/src/commstate.hh
@@ -2,7 +2,7 @@
#define COMMSTATE_HH
#include <cstdlib>
-#include <queue>
+#include <iostream>
#include <vector>
#include <mpi.h>
@@ -29,115 +29,132 @@ enum astate {
state_done
};
+char const * tostring (astate const & thestate);
+
+inline ostream& operator<< (ostream& os, astate const & thestate)
+{
+ return os << tostring(thestate);
+}
+
+
+
struct comm_state {
astate thestate;
-
+
comm_state ();
void step ();
- bool done ();
+ bool done () const;
~comm_state ();
-
+
private:
// Forbid copying and passing by value
comm_state (comm_state const &);
comm_state& operator= (comm_state const &);
-
-public:
-
- //////////////////////////////////////////////////////////////////////////
- // the following members are used for collective communications
- //////////////////////////////////////////////////////////////////////////
-
-public:
- // structure describing a per-processor buffer for collective communications
+
+
+
+ // structure describing a per-processor buffer
struct procbufdesc {
- // the allocated communication buffers
+ // allocated communication buffers
vector<char> sendbufbase;
vector<char> recvbufbase;
-
- // the sizes of communication buffers (in elements of type <datatype>)
+
+ // sizes of the communication buffers (in elements of type <datatype>)
size_t sendbufsize;
size_t recvbufsize;
-
+
// pointers to step through the communication buffers
// (these get advanced by the routines which fill/empty the buffers)
char* sendbuf;
char* recvbuf;
-
+
+ bool did_post_send;
+ bool did_post_recv;
+
// constructor for an instance of this structure
- procbufdesc() : sendbufsize(0), recvbufsize(0),
- sendbuf(NULL), recvbuf(NULL)
+ procbufdesc() :
+ sendbufsize(0), recvbufsize(0),
+ sendbuf(NULL), recvbuf(NULL),
+ did_post_send(false), did_post_recv(false)
{
}
};
-
+
+
+
// structure describing a collective communications buffer for a C datatype
struct typebufdesc {
// flag indicating whether this buffer is in use
bool in_use;
-
+
+ // the MPI datatype
+ MPI_Datatype mpi_datatype;
+
// the size of this datatype (in bytes)
int datatypesize;
-
- // the corresponding MPI datatype
- MPI_Datatype mpi_datatype;
-
+
// per-processor buffers
- vector<procbufdesc> procbufs; // [dist::size()]
-
+ vector<procbufdesc> procbufs; // [dist::size()]
+
// constructor for an instance of this structure
- typebufdesc() : in_use(false), datatypesize(0),
- mpi_datatype(MPI_DATATYPE_NULL)
+ typebufdesc() :
+ in_use(false),
+ mpi_datatype(MPI_DATATYPE_NULL), datatypesize(0)
{
}
};
-
- // list of datatype buffers
- vector<typebufdesc> typebufs; // [dist::c_ndatatypes()]
-
+
+
+
+ // datatype buffers
+ vector<typebufdesc> typebufs; // [type]
+
+
+
+ // outstanding requests for posted send/recv communications
+ vector<MPI_Request> srequests;
+ vector<MPI_Request> rrequests;
+
+ static inline
+ MPI_Request & push_back (vector<MPI_Request> & reqs)
+ {
+ reqs.push_back (MPI_REQUEST_NULL);
+ return reqs.back();
+ }
+
+
+
+public:
+
void
- reserve_send_space (unsigned int type,
+ reserve_send_space (unsigned type,
int proc,
int npoints);
-
+
void
- reserve_recv_space (unsigned int type,
+ reserve_recv_space (unsigned type,
int proc,
int npoints);
-
+
void *
- send_buffer (unsigned int type,
+ send_buffer (unsigned type,
int proc,
int npoints);
-
+
void *
- recv_buffer (unsigned int type,
+ recv_buffer (unsigned type,
int proc,
int npoints);
-
+
void
- commit_send_space (unsigned int type,
+ commit_send_space (unsigned type,
int proc,
int npoints);
-
+
void
- commit_recv_space (unsigned int type,
+ commit_recv_space (unsigned type,
int proc,
int npoints);
-
-private:
- // lists of outstanding requests for posted send/recv communications
- vector<MPI_Request> srequests; // [dist::size() * dist::c_ndatatypes()]
- vector<MPI_Request> rrequests; // [dist::size() * dist::c_ndatatypes()]
-
- // number of posted and already completed receive communications
- int num_posted_recvs;
- int num_completed_recvs;
-
- // wait for completion of posted collective buffer sends/receives
- bool AllPostedCommunicationsFinished();
};
-
-
#endif // COMMSTATE_HH
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc
index 36a48df40..06adb0276 100644
--- a/Carpet/CarpetLib/src/copy_3d.cc
+++ b/Carpet/CarpetLib/src/copy_3d.cc
@@ -7,7 +7,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc
new file mode 100644
index 000000000..c5ba95371
--- /dev/null
+++ b/Carpet/CarpetLib/src/copy_4d.cc
@@ -0,0 +1,142 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_4d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND4(i,j,k,l) \
+ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \
+ srciext, srcjext, srckext, srclext)
+#define DSTIND4(i,j,k,l) \
+ index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \
+ dstiext, dstjext, dstkext, dstlext)
+
+
+
+ template <typename T>
+ void
+ copy_4d (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox)
+ {
+ if (any (srcbbox.stride() != regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ cout << "copy_4d.cc:" << endl
+ << "srcbbox=" << srcbbox << endl
+ << "dstbbox=" << dstbbox << endl
+ << "regbbox=" << regbbox << endl;
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+ if (not regbbox.is_contained_in(srcbbox) or
+ not regbbox.is_contained_in(dstbbox))
+ {
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ ivect4 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower()) % srcbbox.stride() == 0));
+ ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / srcbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % dstbbox.stride() == 0));
+ ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / dstbbox.stride();
+
+
+
+ ptrdiff_t const srciext = srcext[0];
+ ptrdiff_t const srcjext = srcext[1];
+ ptrdiff_t const srckext = srcext[2];
+ ptrdiff_t const srclext = srcext[3];
+
+ ptrdiff_t const dstiext = dstext[0];
+ ptrdiff_t const dstjext = dstext[1];
+ ptrdiff_t const dstkext = dstext[2];
+ ptrdiff_t const dstlext = dstext[3];
+
+ ptrdiff_t const regiext = regext[0];
+ ptrdiff_t const regjext = regext[1];
+ ptrdiff_t const regkext = regext[2];
+ ptrdiff_t const reglext = regext[3];
+
+ ptrdiff_t const srcioff = srcoff[0];
+ ptrdiff_t const srcjoff = srcoff[1];
+ ptrdiff_t const srckoff = srcoff[2];
+ ptrdiff_t const srcloff = srcoff[3];
+
+ ptrdiff_t const dstioff = dstoff[0];
+ ptrdiff_t const dstjoff = dstoff[1];
+ ptrdiff_t const dstkoff = dstoff[2];
+ ptrdiff_t const dstloff = dstoff[3];
+
+
+
+ // Loop over region
+#pragma omp parallel for
+ for (int l=0; l<reglext; ++l) {
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+ dst [DSTIND4(i, j, k, l)] = src [SRCIND4(i, j, k, l)];
+
+ }
+ }
+ }
+ }
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ copy_4d (T const * restrict const src, \
+ ivect4 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect4 const & restrict dstext, \
+ ibbox4 const & restrict srcbbox, \
+ ibbox4 const & restrict dstbbox, \
+ ibbox4 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index d19df3788..da32ee4ea 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -25,7 +25,8 @@
#include "vect.hh"
#include "data.hh"
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
+#include "operator_prototypes_4d.hh"
using namespace std;
@@ -100,6 +101,73 @@ call_operator (void
#endif
}
+template <typename T>
+static
+void
+call_operator (void
+ (* the_operator) (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox),
+ T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox)
+{
+#ifndef _OPENMP
+ (* the_operator) (src, srcext, dst, dstext, srcbbox, dstbbox, regbbox);
+#else
+# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE)
+ ibset allregbboxes;
+# endif
+#pragma omp parallel
+ {
+ int const num_threads = omp_get_num_threads();
+ int const thread_num = omp_get_thread_num();
+ // Parallelise in z direction
+ // TODO: parallelise along longest extent
+ int const dir = 2;
+ int const stride = regbbox.stride()[dir];
+ int const first_point = regbbox.lower()[dir];
+ int const last_point = regbbox.upper()[dir] + stride;
+ int const num_points = last_point - first_point;
+ assert (num_points >= 0);
+ assert (num_points % stride == 0);
+ int const my_num_points =
+ (num_points / stride + num_threads - 1) / num_threads * stride;
+ int const my_first_point =
+ min (last_point, first_point + thread_num * my_num_points);
+ int const my_last_point =
+ max (my_first_point, min (last_point, my_first_point + my_num_points));
+ ibbox4 const myregbbox
+ (regbbox.lower().replace (dir, my_first_point),
+ regbbox.upper().replace (dir, my_last_point - stride),
+ regbbox.stride());
+ if (not myregbbox.empty()) {
+ (* the_operator) (src, srcext, dst, dstext, srcbbox, dstbbox, myregbbox);
+# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE)
+#pragma omp critical
+ allregbboxes += myregbbox;
+# endif
+ }
+ }
+# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE)
+ if (not (allregbboxes == ibset (regbbox))) {
+ allregbboxes.normalize();
+ cout << "allregbboxes=" << allregbboxes << endl
+ << "regbbox=" << regbbox << endl;
+ }
+ assert (allregbboxes == ibset (regbbox));
+# endif
+#endif
+}
+
// Fortran wrappers
@@ -200,16 +268,13 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src,
-static const CCTK_REAL eps = 1.0e-10;
-
// Constructors
template<typename T>
data<T>::data (const int varindex_,
const centering cent_, const operator_type transport_operator_,
const int vectorlength_, const int vectorindex_,
- data* const vectorleader_,
- const int tag_)
- : gdata(varindex_, cent_, transport_operator_, tag_),
+ data* const vectorleader_)
+ : gdata(varindex_, cent_, transport_operator_),
_memory(NULL),
vectorlength(vectorlength_), vectorindex(vectorindex_),
vectorleader(vectorleader_)
@@ -249,11 +314,10 @@ data<T>::~data ()
template<typename T>
data<T>* data<T>::make_typed (const int varindex_,
const centering cent_,
- const operator_type transport_operator_,
- const int tag_)
+ const operator_type transport_operator_)
const
{
- return new data(varindex_, cent_, transport_operator_, 1, 0, NULL, tag_);
+ return new data(varindex_, cent_, transport_operator_, 1, 0, NULL);
}
@@ -335,6 +399,7 @@ copy_from_innerloop (gdata const * const gsrc,
assert (proc() == src->proc());
assert (dist::rank() == proc());
+#if CARPET_DIM == 3
copy_3d (static_cast <T const *> (src->storage()),
src->shape(),
static_cast <T *> (this->storage()),
@@ -342,6 +407,17 @@ copy_from_innerloop (gdata const * const gsrc,
src->extent(),
this->extent(),
box);
+#elif CARPET_DIM == 4
+ copy_4d (static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+#else
+# error "Value for CARPET_DIM not supported"
+#endif
}
@@ -382,7 +458,8 @@ transfer_time (vector <gdata const *> const & gsrcs,
{
// Use this timelevel, or interpolate in time if set to -1
int timelevel0, ntimelevels;
- find_source_timelevel (times, time, order_time, timelevel0, ntimelevels);
+ find_source_timelevel
+ (times, time, order_time, transport_operator, timelevel0, ntimelevels);
if (ntimelevels > 1) {
// Time interpolation is necessary
@@ -437,7 +514,8 @@ transfer_p_r (data const * const src,
copy_from_innerloop (src, box);
} else if (all (src->extent().stride() > this->extent().stride())) {
// Prolongate
- assert (transport_operator != op_sync);
+ assert (transport_operator != op_sync and
+ transport_operator != op_restrict);
transfer_p_vc_cc (src, box, order_space);
} else if (all (src->extent().stride() < this->extent().stride())) {
// Restrict
@@ -457,6 +535,9 @@ transfer_p_vc_cc (data const * const src,
ibbox const & box,
int const order_space)
{
+ transfer_prolongate (src, box, order_space);
+
+#if 0
if (cent == vertex_centered) {
// Vertex centred
@@ -501,6 +582,8 @@ transfer_p_vc_cc (data const * const src,
newdstbox .contracted_for (tmpsrcbox) .expand (offsetlo, offsethi);
// Allocate temporary storage
+ // TODO: This may not be necessary if the source is already a
+ // temporary
data * const newsrc =
new data (src->varindex, vertex_centered, src->transport_operator);
newsrc->allocate (newsrcbox, src->proc());
@@ -538,6 +621,7 @@ transfer_p_vc_cc (data const * const src,
} else {
assert (0);
}
+#endif
}
template <>
@@ -562,25 +646,139 @@ transfer_prolongate (data const * const src,
static Timer total ("prolongate");
total.start ();
+#if CARPET_DIM == 3
+
switch (transport_operator) {
case op_copy:
case op_Lagrange: {
static Timer timer ("prolongate_Lagrange");
timer.start ();
+ // enum centering { vertex_centered, cell_centered };
+ switch (cent) {
+ case vertex_centered:
+ switch (order_space) {
+ case 1:
+ call_operator<T> (& prolongate_3d_o1_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 3:
+ call_operator<T> (& prolongate_3d_o3_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 5:
+ call_operator<T> (& prolongate_3d_o5_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 7:
+ call_operator<T> (& prolongate_3d_o7_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 9:
+ call_operator<T> (& prolongate_3d_o9_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 11:
+ call_operator<T> (& prolongate_3d_o11_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ default:
+ CCTK_WARN (CCTK_WARN_ABORT,
+ "There is no vertex-centred stencil for op=\"LAGRANGE\" with order_space not in {1, 3, 5, 7, 9, 11}");
+ break;
+ }
+ break;
+ case cell_centered:
+ switch (order_space) {
+ case 0:
+ call_operator<T> (& prolongate_3d_cc_o0_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 1:
+ call_operator<T> (& prolongate_3d_cc_o1_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ case 2:
+ call_operator<T> (& prolongate_3d_cc_o2_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ default:
+ CCTK_WARN (CCTK_WARN_ABORT,
+ "There is no cell-centred stencil for op=\"LAGRANGE\" with order_space not in {0, 1, 2}");
+ break;
+ }
+ break;
+ default:
+ assert (0);
+ }
+ timer.stop (0);
+ break;
+ }
+
+ case op_ENO: {
+ static Timer timer ("prolongate_ENO");
+ timer.start ();
switch (order_space) {
case 1:
- call_operator<T> (& prolongate_3d_o1_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- box);
+ CCTK_WARN (CCTK_WARN_ABORT,
+ "There is no stencil for op=\"ENO\" with order_space=1");
break;
case 3:
- call_operator<T> (& prolongate_3d_o3_rf2,
+ call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
src->shape(),
static_cast <T *> (this->storage()),
@@ -590,37 +788,10 @@ transfer_prolongate (data const * const src,
box);
break;
case 5:
- call_operator<T> (& prolongate_3d_o5_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- box);
- break;
- case 7:
- call_operator<T> (& prolongate_3d_o7_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- box);
- break;
- case 9:
- call_operator<T> (& prolongate_3d_o9_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- box);
- break;
- case 11:
- call_operator<T> (& prolongate_3d_o11_rf2,
+ // There is only one parameter for the prolongation order, but
+ // Whisky may want 5th order for spacetime and 3rd order for
+ // hydro, so we cheat here.
+ call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
src->shape(),
static_cast <T *> (this->storage()),
@@ -631,22 +802,26 @@ transfer_prolongate (data const * const src,
break;
default:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"LAGRANGE\" with order_space not in {1, 3, 5, 7, 9, 11}");
+ "There is no stencil for op=\"ENO\" with order_space!=3");
break;
}
timer.stop (0);
break;
}
- case op_ENO: {
- static Timer timer ("prolongate_ENO");
+ case op_WENO: {
+ static Timer timer ("prolongate_WENO");
timer.start ();
switch (order_space) {
case 1:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"ENO\" with order_space=1");
+ "There is no stencil for op=\"WENO\" with order_space=1");
break;
case 3:
+ CCTK_WARN (CCTK_WARN_ABORT,
+ "There is no stencil for op=\"WENO\" with order_space=3");
+ break;
+ case 5:
call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
src->shape(),
@@ -656,40 +831,29 @@ transfer_prolongate (data const * const src,
this->extent(),
box);
break;
- case 5:
- // there is only a parameter for the prolongation order, but Whisky may want 5th order for spacetime and 3rd order for hydro; so this is a trick.
- call_operator<T> (& prolongate_3d_eno,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- box);
- break;
default:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"ENO\" with order_space!=3");
+ "There is no stencil for op=\"WENO\" with order_space!=5");
break;
}
timer.stop (0);
break;
}
- case op_WENO: {
- static Timer timer ("prolongate_WENO");
+ case op_Lagrange_monotone: {
+ static Timer timer ("prolongate_Lagrange_monotone");
timer.start ();
switch (order_space) {
case 1:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"WENO\" with order_space=1");
+ "There is no stencil for op=\"Lagrange_monotone\" with order_space=1");
break;
case 3:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"WENO\" with order_space=3");
+ "There is no stencil for op=\"Lagrange_monotone\" with order_space=3");
break;
case 5:
- call_operator<T> (& prolongate_3d_eno,
+ call_operator<T> (& prolongate_3d_o5_monotone_rf2,
static_cast <T const *> (src->storage()),
src->shape(),
static_cast <T *> (this->storage()),
@@ -700,7 +864,7 @@ transfer_prolongate (data const * const src,
break;
default:
CCTK_WARN (CCTK_WARN_ABORT,
- "There is no stencil for op=\"WENO\" with order_space!=5");
+ "There is no stencil for op=\"Lagrange_monotone\" with order_space!=5");
break;
}
timer.stop (0);
@@ -711,6 +875,48 @@ transfer_prolongate (data const * const src,
assert (0);
} // switch (transport_operator)
+#elif CARPET_DIM == 4
+
+ switch (transport_operator) {
+
+ case op_copy:
+ case op_Lagrange: {
+ static Timer timer ("prolongate_Lagrange");
+ timer.start ();
+ // enum centering { vertex_centered, cell_centered };
+ switch (cent) {
+ case vertex_centered:
+ switch (order_space) {
+ case 1:
+ call_operator<T> (& prolongate_4d_o1_rf2,
+ static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ default:
+ CCTK_WARN (CCTK_WARN_ABORT,
+ "There is no vertex-centred stencil for op=\"LAGRANGE\" with order_space not in {1}");
+ break;
+ }
+ break;
+ default:
+ assert (0);
+ }
+ timer.stop (0);
+ break;
+ }
+ default:
+ assert (0);
+ } // switch (transport_operator)
+
+#else
+# error "Value for CARPET_DIM not supported"
+#endif
+
total.stop (0);
}
@@ -736,12 +942,15 @@ transfer_restrict (data const * const src,
static Timer total ("restrict");
total.start ();
+#if CARPET_DIM == 3
+
switch (transport_operator) {
case op_copy:
case op_Lagrange:
case op_ENO:
case op_WENO:
+ case op_Lagrange_monotone:
// enum centering { vertex_centered, cell_centered };
switch (cent) {
case vertex_centered:
@@ -771,6 +980,36 @@ transfer_restrict (data const * const src,
assert (0);
}
+#elif CARPET_DIM == 4
+
+ switch (transport_operator) {
+
+ case op_copy:
+ case op_Lagrange:
+ // enum centering { vertex_centered, cell_centered };
+ switch (cent) {
+ case vertex_centered:
+ restrict_4d_rf2 (static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ box);
+ break;
+ default:
+ assert (0);
+ }
+ break;
+
+ default:
+ assert (0);
+ }
+
+#else
+# error "Value for CARPET_DIM not supported"
+#endif
+
total.stop (0);
}
@@ -797,7 +1036,9 @@ time_interpolate (vector <data *> const & srcs,
{
static Timer total ("time_interpolate");
total.start ();
-
+
+#if CARPET_DIM == 3
+
switch (transport_operator) {
case op_copy:
@@ -886,8 +1127,10 @@ time_interpolate (vector <data *> const & srcs,
}
case op_ENO:
- case op_WENO: {
- // ENO and WENO timer interpolation is the same for order_time <= 2
+ case op_WENO:
+ case op_Lagrange_monotone: {
+ // ENO, WENO, and Lagrange_monotone time interpolation is the same
+ // for order_time <= 2
static Timer timer ("time_interpolate_ENO");
timer.start ();
switch (order_time) {
@@ -935,6 +1178,14 @@ time_interpolate (vector <data *> const & srcs,
assert (0);
} // switch (transport_operator)
+#elif CARPET_DIM == 4
+
+ assert (0);
+
+#else
+# error "Value for CARPET_DIM not supported"
+#endif
+
total.stop (0);
}
@@ -982,22 +1233,9 @@ output (ostream & os)
return os;
}
-template<typename T>
-ostream &
-operator << (ostream & os, data<T> const & d)
-{
- char const * space = "";
- for (int i = 0; i < d.vectorlength; i++) {
- os << space << d[i];
- space = " ";
- }
- return os;
-}
-
#define INSTANTIATE(T) \
-template class data<T>; \
-template ostream & operator << <T> (ostream & os, data<T> const & d);
+template class data<T>;
#include "instantiate"
#undef INSTANTIATE
diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh
index 6b7a774e1..d321e46c1 100644
--- a/Carpet/CarpetLib/src/data.hh
+++ b/Carpet/CarpetLib/src/data.hh
@@ -20,9 +20,6 @@ using namespace std;
template<typename T>
class data;
-template<typename T>
-ostream & operator << ( ostream & os, const data<T> & d );
-
// A distributed multi-dimensional array
template<typename T>
class data: public gdata
@@ -35,7 +32,7 @@ class data: public gdata
int vectorlength; // number of vector elements
int vectorindex; // index of this vector element
data* vectorleader; // if index!=0: first vector element
-
+
private:
// Forbid copying and passing by value
data (data const &);
@@ -48,23 +45,21 @@ public:
const centering cent = error_centered,
const operator_type transport_operator = op_error,
const int vectorlength = 1, const int vectorindex = 0,
- data* const vectorleader = NULL,
- const int tag = -1);
+ data* const vectorleader = NULL);
data (const int varindex,
const centering cent, const operator_type transport_operator,
const int vectorlength, const int vectorindex,
data* const vectorleader,
const ibbox& extent, const int proc);
-
+
// Destructors
virtual ~data ();
-
+
// Pseudo constructors
virtual data* make_typed (const int varindex,
const centering cent,
- const operator_type transport_operator,
- const int tag) const;
-
+ const operator_type transport_operator) const;
+
// Storage management
virtual void allocate (const ibbox& extent, const int proc,
void* const memptr = NULL, size_t const memsize = 0);
@@ -156,19 +151,10 @@ private:
public:
// Memory usage
- size_t memory () const;
+ virtual size_t memory () const CCTK_ATTRIBUTE_PURE;
// Output
- ostream & output (ostream& os) const;
-
- friend ostream & operator<< <T> (ostream & os, data<T> const & d);
+ virtual ostream & output (ostream& os) const;
};
-
-// Memory usage
-template<typename T>
-inline size_t memoryof (data<T> const & d)
-{
- return d.memory();
-}
#endif // DATA_HH
diff --git a/Carpet/CarpetLib/src/defs.cc b/Carpet/CarpetLib/src/defs.cc
index 357061ee2..ab3b021d8 100644
--- a/Carpet/CarpetLib/src/defs.cc
+++ b/Carpet/CarpetLib/src/defs.cc
@@ -2,11 +2,13 @@
#include <cctype>
#include <iostream>
#include <list>
+#include <map>
#include <set>
#include <stack>
#include <vector>
#include "cctk.h"
+#include "cctk_Parameters.h"
#include "bbox.hh"
#include "defs.hh"
@@ -19,6 +21,8 @@ using namespace std;
template <typename T>
+inline T ipow_helper (T x, unsigned int y) CCTK_ATTRIBUTE_CONST;
+template <typename T>
inline T ipow_helper (T x, unsigned int y)
{
T z = y&1 ? x : 1;
@@ -31,7 +35,7 @@ inline T ipow_helper (T x, unsigned int y)
}
template<class T>
-T ipow (T x, int y)
+T ipow (T const x, int const y)
{
if (y < 0)
return T(1) / ipow_helper(x, -y);
@@ -41,8 +45,23 @@ T ipow (T x, int y)
+// Access to CarpetLib parameters
+CCTK_INT get_poison_value()
+{
+ DECLARE_CCTK_PARAMETERS;
+ return poison_value;
+}
+
+CCTK_INT get_deadbeef()
+{
+ DECLARE_CCTK_PARAMETERS;
+ return deadbeef;
+}
+
+
+
void skipws (istream& is) {
- while (is.good() && isspace(is.peek())) {
+ while (is.good() and isspace(is.peek())) {
is.get();
}
}
@@ -138,6 +157,60 @@ memoryof (vector<T> const & c)
+// List input
+template<class T>
+istream& input (istream& is, list<T>& l) {
+ l.clear();
+ try {
+ skipws (is);
+ consume (is, '[');
+ skipws (is);
+ while (is.good() and is.peek() != ']') {
+ T elem;
+ is >> elem;
+ l.push_back (elem);
+ skipws (is);
+ if (is.peek() != ',') break;
+ is.get();
+ skipws (is);
+ }
+ skipws (is);
+ consume (is, ']');
+ } catch (input_error &err) {
+ cout << "Input error while reading a list<>" << endl
+ << " The following elements have been read so far: " << l << endl;
+ throw err;
+ }
+ return is;
+}
+
+// Set input
+template<class T>
+istream& input (istream& is, set<T>& s) {
+ s.clear();
+ try {
+ skipws (is);
+ consume (is, '{');
+ skipws (is);
+ while (is.good() and is.peek() != '}') {
+ T elem;
+ is >> elem;
+ s.insert (elem);
+ skipws (is);
+ if (is.peek() != ',') break;
+ is.get();
+ skipws (is);
+ }
+ skipws (is);
+ consume (is, ']');
+ } catch (input_error &err) {
+ cout << "Input error while reading a set<>" << endl
+ << " The following elements have been read so far: " << s << endl;
+ throw err;
+ }
+ return is;
+}
+
// Vector input
template<class T>
istream& input (istream& is, vector<T>& v) {
@@ -146,7 +219,7 @@ istream& input (istream& is, vector<T>& v) {
skipws (is);
consume (is, '[');
skipws (is);
- while (is.good() && is.peek() != ']') {
+ while (is.good() and is.peek() != ']') {
T elem;
is >> elem;
v.push_back (elem);
@@ -179,6 +252,25 @@ ostream& output (ostream& os, const list<T>& l) {
return os;
}
+// Map output
+template<class S, class T>
+ostream& output (ostream& os, const map<S,T>& m) {
+ os << "{";
+ for (typename map<S,T>::const_iterator ti=m.begin(); ti!=m.end(); ++ti) {
+ if (ti!=m.begin()) os << ",";
+ os << ti->first << ":" << ti->second;
+ }
+ os << "}";
+ return os;
+}
+
+// Pair output
+template<class S, class T>
+ostream& output (ostream& os, const pair<S,T>& p) {
+ os << "(" << p.first << "," << p.second << ")";
+ return os;
+}
+
// Set output
template<class T>
ostream& output (ostream& os, const set<T>& s) {
@@ -196,7 +288,7 @@ template<class T>
ostream& output (ostream& os, const stack<T>& s) {
stack<T> s2 (s);
list<T> l;
- while (! s2.empty()) {
+ while (not s2.empty()) {
l.insert (l.begin(), s2.top());
s2.pop();
}
@@ -231,28 +323,32 @@ ostream& output (ostream& os, const vector<T>& v) {
#include "th.hh"
#include "vect.hh"
+#include "CarpetTimers.hh"
+
template int ipow (int x, int y);
template CCTK_REAL ipow (CCTK_REAL x, int y);
-template vect<int,3> ipow (vect<int,3> x, int y);
+template vect<int,dim> ipow (vect<int,dim> x, int y);
-template size_t memoryof (list<bbox<int,3> > const & l);
-template size_t memoryof (list<vect<int,3> > const & l);
+template size_t memoryof (list<bbox<int,dim> > const & l);
+template size_t memoryof (list<vect<int,dim> > const & l);
template size_t memoryof (list<dh*> const & l);
+template size_t memoryof (list<gh*> const & l);
+template size_t memoryof (list<gdata*> const & l);
template size_t memoryof (list<ggf*> const & l);
template size_t memoryof (list<th*> const & l);
template size_t memoryof (stack<void*> const & s);
template size_t memoryof (vector<bool> const & v);
template size_t memoryof (vector<int> const & v);
template size_t memoryof (vector<CCTK_REAL> const & v);
-template size_t memoryof (vector<bbox<int,3> > const & v);
-template size_t memoryof (vector<vect<int,3> > const & v);
-template size_t memoryof (vector<fulltree <int,3,pseudoregion_t> *> const & f);
+template size_t memoryof (vector<bbox<int,dim> > const & v);
+template size_t memoryof (vector<vect<int,dim> > const & v);
+template size_t memoryof (vector<fulltree <int,dim,pseudoregion_t> *> const & f);
template size_t memoryof (vector<pseudoregion_t> const & v);
template size_t memoryof (vector<region_t> const & v);
template size_t memoryof (vector<sendrecv_pseudoregion_t> const & v);
template size_t memoryof (vector<vector<int> > const & v);
template size_t memoryof (vector<vector<CCTK_REAL> > const & v);
-template size_t memoryof (vector<vector<bbox<int,3> > > const & v);
+template size_t memoryof (vector<vector<bbox<int,dim> > > const & v);
template size_t memoryof (vector<vector<dh::dboxes> > const & v);
template size_t memoryof (vector<vector<dh::fast_dboxes> > const & v);
template size_t memoryof (vector<vector<region_t> > const & v);
@@ -262,49 +358,54 @@ template size_t memoryof (vector<vector<vector<region_t> > > const & v);
template size_t memoryof (vector<vector<vector<gdata*> > > const & v);
template size_t memoryof (vector<vector<vector<vector<gdata*> > > > const & v);
+template istream& input (istream& os, list<bbox<int,dim> >& l);
+template istream& input (istream& os, set<bbox<int,dim> >& s);
template istream& input (istream& os, vector<int>& v);
template istream& input (istream& os, vector<CCTK_REAL>& v);
-template istream& input (istream& os, vector<bbox<int,3> >& v);
-template istream& input (istream& os, vector<bbox<CCTK_REAL,3> >& v);
-template istream& input (istream& os, vector<vect<int,3> >& v);
-template istream& input (istream& os, vector<vect<vect<bool,2>,3> >& v);
+template istream& input (istream& os, vector<bbox<int,dim> >& v);
+template istream& input (istream& os, vector<bbox<CCTK_REAL,dim> >& v);
+template istream& input (istream& os, vector<vect<int,dim> >& v);
+template istream& input (istream& os, vector<vect<vect<bool,2>,dim> >& v);
template istream& input (istream& os, vector<region_t>& v);
+template istream& input (istream& os, vector<pseudoregion_t>& v);
+template istream& input (istream& os, vector<sendrecv_pseudoregion_t>& v);
template istream& input (istream& os, vector<vector<CCTK_REAL> >& v);
-template istream& input (istream& os, vector<vector<bbox<int,3> > >& v);
-template istream& input (istream& os, vector<vector<bbox<CCTK_REAL,3> > >& v);
-template istream& input (istream& os, vector<vector<vect<vect<bool,2>,3> > >& v);
+template istream& input (istream& os, vector<vector<bbox<int,dim> > >& v);
+template istream& input (istream& os, vector<vector<bbox<CCTK_REAL,dim> > >& v);
+template istream& input (istream& os, vector<vector<vect<vect<bool,2>,dim> > >& v);
template istream& input (istream& os, vector<vector<region_t> >& v);
template istream& input (istream& os, vector<vector<vector<CCTK_REAL> > >& v);
template istream& input (istream& os, vector<vector<vector<region_t> > >& v);
-template ostream& output (ostream& os, const list<bbox<int,3> >& l);
+template ostream& output (ostream& os, const list<bbox<int,dim> >& l);
template ostream& output (ostream& os, const list<region_t>& l);
-template ostream& output (ostream& os, const set<bbox<int,3> >& s);
-template ostream& output (ostream& os, const set<bboxset<int,3> >& s);
-template ostream& output (ostream& os, const stack<bbox<int,3> >& s);
+template ostream& output (ostream& os, const map<string,Carpet::Timer*>& m);
+template ostream& output (ostream& os, const set<bbox<int,dim> >& s);
+template ostream& output (ostream& os, const set<bboxset<int,dim> >& s);
+template ostream& output (ostream& os, const stack<bbox<int,dim> >& s);
template ostream& output (ostream& os, const vector<bool>& v);
template ostream& output (ostream& os, const vector<int>& v);
template ostream& output (ostream& os, const vector<CCTK_REAL>& v);
-template ostream& output (ostream& os, const vector<bbox<int,3> >& v);
-template ostream& output (ostream& os, const vector<bbox<CCTK_REAL,3> >& v);
-template ostream& output (ostream& os, const vector<vect<int,3> >& v);
-template ostream& output (ostream& os, const vector<vect<vect<bool,2>,3> >& v);
+template ostream& output (ostream& os, const vector<bbox<int,dim> >& v);
+template ostream& output (ostream& os, const vector<bbox<CCTK_REAL,dim> >& v);
+template ostream& output (ostream& os, const vector<vect<int,dim> >& v);
+template ostream& output (ostream& os, const vector<vect<vect<bool,2>,dim> >& v);
template ostream& output (ostream& os, const vector<dh::dboxes> & v);
template ostream& output (ostream& os, const vector<dh::fast_dboxes> & v);
template ostream& output (ostream& os, const vector<region_t>& v);
template ostream& output (ostream& os, const vector<pseudoregion_t>& v);
template ostream& output (ostream& os, const vector<sendrecv_pseudoregion_t>& v);
-template ostream& output (ostream& os, const vector<list<bbox<int,3> > >& v);
+template ostream& output (ostream& os, const vector<list<bbox<int,dim> > >& v);
template ostream& output (ostream& os, const vector<vector<int> >& v);
template ostream& output (ostream& os, const vector<vector<CCTK_REAL> >& v);
-template ostream& output (ostream& os, const vector<vector<bbox<int,3> > >& v);
-template ostream& output (ostream& os, const vector<vector<bbox<CCTK_REAL,3> > >& v);
-template ostream& output (ostream& os, const vector<vector<vect<vect<bool,2>,3> > >& v);
+template ostream& output (ostream& os, const vector<vector<bbox<int,dim> > >& v);
+template ostream& output (ostream& os, const vector<vector<bbox<CCTK_REAL,dim> > >& v);
+template ostream& output (ostream& os, const vector<vector<vect<vect<bool,2>,dim> > >& v);
template ostream& output (ostream& os, const vector<vector<dh::dboxes> > & b);
template ostream& output (ostream& os, const vector<vector<dh::fast_dboxes> > & b);
template ostream& output (ostream& os, const vector<vector<region_t> >& v);
template ostream& output (ostream& os, const vector<vector<vector<CCTK_REAL> > >& v);
-template ostream& output (ostream& os, const vector<vector<vector<bbox<int,3> > > >& v);
+template ostream& output (ostream& os, const vector<vector<vector<bbox<int,dim> > > >& v);
template ostream& output (ostream& os, const vector<vector<vector<dh::dboxes> > > & b);
template ostream& output (ostream& os, const vector<vector<vector<dh::fast_dboxes> > > & b);
template ostream& output (ostream& os, const vector<vector<vector<region_t> > >& v);
diff --git a/Carpet/CarpetLib/src/defs.hh b/Carpet/CarpetLib/src/defs.hh
index a44d20ef0..3a1188d21 100644
--- a/Carpet/CarpetLib/src/defs.hh
+++ b/Carpet/CarpetLib/src/defs.hh
@@ -11,25 +11,24 @@
#include <cstdlib>
#include <iostream>
#include <list>
+#include <map>
#include <set>
#include <stack>
#include <vector>
#include "cctk.h"
+#include "typeprops.hh"
-using namespace std;
-
-
-// A compile time pseudo assert statement
-#define static_assert(_x, _msg) do { typedef int ai[(_x) ? 1 : -1]; } while(0)
+using namespace std;
-// Check a return value
-#define check(_expr) do { bool const _val = (_expr); assert(_val); } while(0)
+// Stringify
+#define STRINGIFY1(x) #x
+#define STRINGIFY(x) STRINGIFY1(x)
@@ -40,14 +39,11 @@ using namespace std;
-// Use this macro AT instead of vector's operator[] or at().
-// Depending on the macro NDEBUG, this macro AT either checks for
-// valid indices or not.
-#ifndef CARPET_OPTIMISE
-# define AT(index) at(index)
-#else
-# define AT(index) operator[](index)
+// Number of dimensions
+#ifndef CARPET_DIM
+# define CARPET_DIM 3
#endif
+const int dim = CARPET_DIM;
@@ -55,9 +51,25 @@ using namespace std;
char const * const eol = "\n";
+
+// A compile time pseudo assert statement
+#define static_assert(_x, _msg) do { typedef int ai[(_x) ? 1 : -1]; } while(0)
-// Number of dimensions
-const int dim = 3;
+
+
+// Check a return value
+#define check(_expr) do { bool const _val = (_expr); assert(_val); } while(0)
+
+
+
+// Use this macro AT instead of vector's operator[] or at().
+// Depending on the macro CARPET_OPTIMISE, this macro AT either checks
+// for valid indices or not.
+#if ! defined(CARPET_OPTIMISE)
+# define AT(index) at(index)
+#else
+# define AT(index) operator[](index)
+#endif
@@ -100,11 +112,19 @@ enum centering { error_centered, vertex_centered, cell_centered };
// Useful helper
template<class T>
+inline T square (const T& x) CCTK_ATTRIBUTE_CONST;
+template<class T>
inline T square (const T& x) { return x*x; }
// Another useful helper
template<class T>
-T ipow (T x, int y);
+T ipow (T x, int y) CCTK_ATTRIBUTE_CONST;
+
+
+
+// Access to CarpetLib parameters
+CCTK_INT get_poison_value() CCTK_ATTRIBUTE_CONST;
+CCTK_INT get_deadbeef() CCTK_ATTRIBUTE_CONST;
@@ -120,52 +140,62 @@ void consume (istream& is, char const * c);
// Names for types
#ifdef HAVE_CCTK_INT1
-inline const char * typestring (const CCTK_INT1& dummy)
+inline const char * typestring (const CCTK_INT1&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_INT1&)
{ return "CCTK_INT1"; }
#endif
#ifdef HAVE_CCTK_INT2
-inline const char * typestring (const CCTK_INT2& dummy)
+inline const char * typestring (const CCTK_INT2&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_INT2&)
{ return "CCTK_INT2"; }
#endif
#ifdef HAVE_CCTK_INT4
-inline const char * typestring (const CCTK_INT4& dummy)
+inline const char * typestring (const CCTK_INT4&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_INT4&)
{ return "CCTK_INT4"; }
#endif
#ifdef HAVE_CCTK_INT8
-inline const char * typestring (const CCTK_INT8& dummy)
+inline const char * typestring (const CCTK_INT8&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_INT8&)
{ return "CCTK_INT8"; }
#endif
#ifdef HAVE_CCTK_REAL4
-inline const char * typestring (const CCTK_REAL4& dummy)
+inline const char * typestring (const CCTK_REAL4&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_REAL4&)
{ return "CCTK_REAL4"; }
#endif
#ifdef HAVE_CCTK_REAL8
-inline const char * typestring (const CCTK_REAL8& dummy)
+inline const char * typestring (const CCTK_REAL8&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_REAL8&)
{ return "CCTK_REAL8"; }
#endif
#ifdef HAVE_CCTK_REAL16
-inline const char * typestring (const CCTK_REAL16& dummy)
+inline const char * typestring (const CCTK_REAL16&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_REAL16&)
{ return "CCTK_REAL16"; }
#endif
#ifdef HAVE_CCTK_REAL4
-inline const char * typestring (const CCTK_COMPLEX8& dummy)
+inline const char * typestring (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_COMPLEX8&)
{ return "CCTK_COMPLEX8"; }
#endif
#ifdef HAVE_CCTK_REAL8
-inline const char * typestring (const CCTK_COMPLEX16& dummy)
+inline const char * typestring (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_COMPLEX16&)
{ return "CCTK_COMPLEX16"; }
#endif
#ifdef HAVE_CCTK_REAL16
-inline const char * typestring (const CCTK_COMPLEX32& dummy)
+inline const char * typestring (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST;
+inline const char * typestring (const CCTK_COMPLEX32&)
{ return "CCTK_COMPLEX32"; }
#endif
@@ -174,78 +204,105 @@ inline const char * typestring (const CCTK_COMPLEX32& dummy)
namespace CarpetLib {
namespace good {
- // Explicitly overload abs for all types in the same namespace, to
- // circumvent confusion among some compilers
+ // Explicitly overload some functions for all types in the same
+ // namespace CarpetLib::good, to circumvent confusion among some
+ // compilers
- // CCTK_BYTE is unsigned
- inline CCTK_BYTE abs (CCTK_BYTE const & x) { return x; }
+ //
+ // abs
+ //
-#if 0
- // This does not work on AIX, which does not have long long abs
- // (long long)
-# ifdef HAVE_CCTK_INT1
- inline CCTK_INT1 abs (CCTK_INT1 const & x) { return std::abs (x); }
-# endif
-# ifdef HAVE_CCTK_INT2
- inline CCTK_INT2 abs (CCTK_INT2 const & x) { return std::abs (x); }
-# endif
-# ifdef HAVE_CCTK_INT4
- inline CCTK_INT4 abs (CCTK_INT4 const & x) { return std::abs (x); }
-# endif
-# ifdef HAVE_CCTK_INT8
- inline CCTK_INT8 abs (CCTK_INT8 const & x) { return std::abs (x); }
-# endif
-#endif
+ template <typename T>
+ inline typename typeprops<T>::real abs (T const & x) CCTK_ATTRIBUTE_CONST;
+ template <typename T>
+ inline typename typeprops<T>::real abs (T const & x)
+ { return std::abs (x); }
-#if 0
- // This does not work on Linux with Intel compilers, which do not
- // always have long long llabs (long long)
- inline signed char abs (signed char const & x) { return ::abs (x); }
- inline unsigned char abs (unsigned char const & x) { return ::abs (x); }
- inline short abs (short const & x) { return ::abs (x); }
- inline int abs (int const & x) { return ::abs (x); }
- inline long abs (long const & x) { return ::labs (x); }
-# ifdef SIZEOF_LONG_LONG
- inline long long abs (long long const & x) { return ::llabs (x); }
-# endif
-#endif
+// // This does not work on Linux with Intel compilers, which do not
+// // always have long long llabs (long long)
+// template<> inline signed char abs<signed char> (signed char const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); }
+// template<> inline unsigned char abs<unsigned char> (unsigned char const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); }
+// template<> inline short abs<short> (short const & x) { return ::abs (x); }
+// template<> inline int abs<int> (int const & x) CCTK_ATTRIBUTE_CONST { return ::abs (x); }
+// template<> inline long abs<long> (long const & x) CCTK_ATTRIBUTE_CONST { return ::labs (x); }
+// #ifdef SIZEOF_LONG_LONG
+// inline long long abs<long long> (long long const & x) CCTK_ATTRIBUTE_CONST { return ::llabs (x); }
+// #endif
-#if 1
-# ifdef HAVE_CCTK_INT1
- inline CCTK_INT1 abs (CCTK_INT1 const & x) { return x < 0 ? - x : x; }
-# endif
-# ifdef HAVE_CCTK_INT2
- inline CCTK_INT2 abs (CCTK_INT2 const & x) { return x < 0 ? - x : x; }
-# endif
-# ifdef HAVE_CCTK_INT4
- inline CCTK_INT4 abs (CCTK_INT4 const & x) { return x < 0 ? - x : x; }
-# endif
-# ifdef HAVE_CCTK_INT8
- inline CCTK_INT8 abs (CCTK_INT8 const & x) { return x < 0 ? - x : x; }
-# endif
+// // This template does not work on AIX, which does not have long
+// // long abs (long long)
+// #ifdef HAVE_CCTK_INT1
+// template<> inline CCTK_INT1 abs<CCTK_INT1> (CCTK_INT1 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; }
+// #endif
+// #ifdef HAVE_CCTK_INT2
+// template<> inline CCTK_INT2 abs<CCTK_INT2> (CCTK_INT2 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; }
+// #endif
+// #ifdef HAVE_CCTK_INT4
+// template<> inline CCTK_INT4 abs<CCTK_INT4> (CCTK_INT4 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; }
+// #endif
+// #ifdef HAVE_CCTK_INT8
+// template<> inline CCTK_INT8 abs<CCTK_INT8> (CCTK_INT8 const & x) CCTK_ATTRIBUTE_CONST { return x < 0 ? - x : x; }
+// #endif
+
+#ifdef HAVE_CCTK_COMPLEX8
+ template<> inline CCTK_REAL4 abs<CCTK_COMPLEX8> (CCTK_COMPLEX8 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline CCTK_REAL4 abs<CCTK_COMPLEX8> (CCTK_COMPLEX8 const & x)
+ { return CCTK_Cmplx8Abs (x); }
+#endif
+#ifdef HAVE_CCTK_COMPLEX16
+ template<> inline CCTK_REAL8 abs<CCTK_COMPLEX16> (CCTK_COMPLEX16 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline CCTK_REAL8 abs<CCTK_COMPLEX16> (CCTK_COMPLEX16 const & x)
+ { return CCTK_Cmplx16Abs (x); }
+#endif
+#ifdef HAVE_CCTK_COMPLEX32
+ template<> inline CCTK_REAL16 abs<CCTK_COMPLEX32> (CCTK_COMPLEX32 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline CCTK_REAL16 abs<CCTK_COMPLEX32> (CCTK_COMPLEX32 const & x)
+ { return CCTK_Cmplx32Abs (x); }
#endif
+ //
+ // isnan
+ //
+
+#undef isnan
+
+ // Default implementation, only good for integers
+ template <typename T>
+ inline int isnan (T const & x) CCTK_ATTRIBUTE_CONST;
+ template <typename T>
+ inline int isnan (T const & x)
+ { return 0; }
+
#ifdef HAVE_CCTK_REAL4
- inline CCTK_REAL4 abs (CCTK_REAL4 const & x) { return std::abs (x); }
+ template<> inline int isnan (CCTK_REAL4 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_REAL4 const & x)
+ { return ::isnan (x); }
#endif
#ifdef HAVE_CCTK_REAL8
- inline CCTK_REAL8 abs (CCTK_REAL8 const & x) { return std::abs (x); }
+ template<> inline int isnan (CCTK_REAL8 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_REAL8 const & x)
+ { return ::isnan (x); }
#endif
#ifdef HAVE_CCTK_REAL16
- inline CCTK_REAL16 abs (CCTK_REAL16 const & x) { return std::abs (x); }
+ template<> inline int isnan (CCTK_REAL16 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_REAL16 const & x)
+ { return ::isnan (x); }
#endif
#ifdef HAVE_CCTK_COMPLEX8
- inline CCTK_REAL4 abs (CCTK_COMPLEX8 const & x)
- { return CCTK_Cmplx8Abs (x); }
+ template<> inline int isnan (CCTK_COMPLEX8 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_COMPLEX8 const & x)
+ { return ::isnan (CCTK_Cmplx8Real (x)) or ::isnan (CCTK_Cmplx8Imag (x)); }
#endif
#ifdef HAVE_CCTK_COMPLEX16
- inline CCTK_REAL8 abs (CCTK_COMPLEX16 const & x)
- { return CCTK_Cmplx16Abs (x); }
+ template<> inline int isnan (CCTK_COMPLEX16 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_COMPLEX16 const & x)
+ { return ::isnan (CCTK_Cmplx16Real (x)) or ::isnan (CCTK_Cmplx16Imag (x)); }
#endif
#ifdef HAVE_CCTK_COMPLEX32
- inline CCTK_REAL16 abs (CCTK_COMPLEX32 const & x)
- { return CCTK_Cmplx32Abs (x); }
+ template<> inline int isnan (CCTK_COMPLEX32 const & x) CCTK_ATTRIBUTE_CONST;
+ template<> inline int isnan (CCTK_COMPLEX32 const & x)
+ { return ::isnan (CCTK_Cmplx32Real (x)) or std::isnan (CCTK_Cmplx32Imag (x)); }
#endif
} // namespace good
@@ -254,33 +311,69 @@ namespace CarpetLib {
// Container memory usage
-inline size_t memoryof (char e) { return sizeof e; }
-inline size_t memoryof (short e) { return sizeof e; }
-inline size_t memoryof (int e) { return sizeof e; }
-inline size_t memoryof (long e) { return sizeof e; }
-inline size_t memoryof (long long e) { return sizeof e; }
-inline size_t memoryof (unsigned char e) { return sizeof e; }
-inline size_t memoryof (unsigned short e) { return sizeof e; }
-inline size_t memoryof (unsigned int e) { return sizeof e; }
-inline size_t memoryof (unsigned long e) { return sizeof e; }
-inline size_t memoryof (unsigned long long e) { return sizeof e; }
-inline size_t memoryof (float e) { return sizeof e; }
-inline size_t memoryof (double e) { return sizeof e; }
-inline size_t memoryof (long double e) { return sizeof e; }
-inline size_t memoryof (void * e) { return sizeof e; }
-template<class T> inline size_t memoryof (T * e) { return sizeof e; }
-template<class T> inline size_t memoryof (T const * e) { return sizeof e; }
-template<class T> size_t memoryof (list<T> const & c);
-template<class T> size_t memoryof (set<T> const & c);
-template<class T> size_t memoryof (stack<T> const & c);
-template<class T> size_t memoryof (vector<T> const & c);
+inline size_t memoryof (char const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (short const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (int const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (long const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (long long const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (unsigned char const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (unsigned short const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (unsigned int const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (unsigned long const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (unsigned long long const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (float const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (double const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (long double const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (void * const e) CCTK_ATTRIBUTE_CONST;
+inline size_t memoryof (void const * const e) CCTK_ATTRIBUTE_CONST;
+template<class T> inline size_t memoryof (T * const e) CCTK_ATTRIBUTE_CONST;
+template<class T> inline size_t memoryof (T const * const e) CCTK_ATTRIBUTE_CONST;
+template<class T> inline size_t memoryof (typename list<T>::iterator const & i) CCTK_ATTRIBUTE_CONST;
+template<class T> inline size_t memoryof (typename list<T>::const_iterator const & i) CCTK_ATTRIBUTE_CONST;
+
+inline size_t memoryof (char const e) { return sizeof e; }
+inline size_t memoryof (short const e) { return sizeof e; }
+inline size_t memoryof (int const e) { return sizeof e; }
+inline size_t memoryof (long const e) { return sizeof e; }
+inline size_t memoryof (long long const e) { return sizeof e; }
+inline size_t memoryof (unsigned char const e) { return sizeof e; }
+inline size_t memoryof (unsigned short const e) { return sizeof e; }
+inline size_t memoryof (unsigned int const e) { return sizeof e; }
+inline size_t memoryof (unsigned long const e) { return sizeof e; }
+inline size_t memoryof (unsigned long long const e) { return sizeof e; }
+inline size_t memoryof (float const e) { return sizeof e; }
+inline size_t memoryof (double const e) { return sizeof e; }
+inline size_t memoryof (long double const e) { return sizeof e; }
+inline size_t memoryof (void * const e) { return sizeof e; }
+inline size_t memoryof (void const * const e) { return sizeof e; }
+template<class T> inline size_t memoryof (T * const e) { return sizeof e; }
+template<class T> inline size_t memoryof (T const * const e) { return sizeof e; }
+template<class T> inline size_t memoryof (typename list<T>::iterator const & i) { return sizeof i; }
+template<class T> inline size_t memoryof (typename list<T>::const_iterator const & i) { return sizeof i; }
+
+template<class T> size_t memoryof (list<T> const & c) CCTK_ATTRIBUTE_PURE;
+template<class T> size_t memoryof (set<T> const & c) CCTK_ATTRIBUTE_PURE;
+template<class T> size_t memoryof (stack<T> const & c) CCTK_ATTRIBUTE_PURE;
+template<class T> size_t memoryof (vector<T> const & c) CCTK_ATTRIBUTE_PURE;
// Container input
+template<class T> istream& input (istream& is, list<T>& l);
+template<class T> istream& input (istream& is, set<T>& s);
template<class T> istream& input (istream& is, vector<T>& v);
template<class T>
+inline istream& operator>> (istream& is, list<T>& l) {
+ return input(is,l);
+}
+
+template<class T>
+inline istream& operator>> (istream& is, set<T>& s) {
+ return input(is,s);
+}
+
+template<class T>
inline istream& operator>> (istream& is, vector<T>& v) {
return input(is,v);
}
@@ -289,6 +382,8 @@ inline istream& operator>> (istream& is, vector<T>& v) {
// Container output
template<class T> ostream& output (ostream& os, const list<T>& l);
+template<class S, class T> ostream& output (ostream& os, const map<S,T>& m);
+template<class S, class T> ostream& output (ostream& os, const pair<S,T>& p);
template<class T> ostream& output (ostream& os, const set<T>& s);
template<class T> ostream& output (ostream& os, const stack<T>& s);
template<class T> ostream& output (ostream& os, const vector<T>& v);
@@ -298,6 +393,11 @@ inline ostream& operator<< (ostream& os, const list<T>& l) {
return output(os,l);
}
+template<class S, class T>
+inline ostream& operator<< (ostream& os, const map<S,T>& m) {
+ return output(os,m);
+}
+
template<class T>
inline ostream& operator<< (ostream& os, const set<T>& s) {
return output(os,s);
diff --git a/Carpet/CarpetLib/src/dh.cc b/Carpet/CarpetLib/src/dh.cc
index 046600c6b..36b3c310f 100644
--- a/Carpet/CarpetLib/src/dh.cc
+++ b/Carpet/CarpetLib/src/dh.cc
@@ -3,6 +3,9 @@
#include "cctk.h"
#include "cctk_Parameters.h"
+#include "CarpetTimers.hh"
+
+#include "mpi_string.hh"
#include "bbox.hh"
#include "bboxset.hh"
#include "defs.hh"
@@ -19,6 +22,10 @@ using namespace CarpetLib;
+list<dh*> dh::alldh;
+
+
+
// Constructors
dh::
dh (gh & h_,
@@ -31,12 +38,14 @@ dh (gh & h_,
assert (all (all (ghost_width >= 0)));
assert (all (all (buffer_width >= 0)));
assert (prolongation_order_space >= 0);
- h.add (this);
+ alldhi = alldh.insert(alldh.end(), this);
+ gh_handle = h.add (this);
CHECKPOINT;
- regrid ();
+ regrid (false);
for (int rl = 0; rl < h.reflevels(); ++ rl) {
recompose (rl, false);
}
+ regrid_free (false);
}
@@ -46,7 +55,8 @@ dh::
~dh ()
{
CHECKPOINT;
- h.remove (this);
+ h.erase (gh_handle);
+ alldh.erase(alldhi);
}
@@ -135,6 +145,15 @@ assert_error (char const * restrict const checkstring,
there_was_an_error = true;
}
+#ifdef CARPET_OPTIMISE
+
+// For highest efficiency, omit all self-checks
+#define ASSERT_rl(check, message)
+#define ASSERT_c(check, message)
+#define ASSERT_cc(check, message)
+
+#else
+
#define ASSERT_rl(check, message) \
do { \
if (not (check)) { \
@@ -156,41 +175,60 @@ assert_error (char const * restrict const checkstring,
} \
} while (false)
+#endif
+
void
dh::
-regrid ()
+regrid (bool const do_init)
{
DECLARE_CCTK_PARAMETERS;
+
+ static Carpet::Timer timer ("CarpetLib::dh::regrid");
+ timer.start();
CHECKPOINT;
- static Timer total ("dh::regrid");
+ static Timer total ("CarpetLib::dh::regrid");
total.start ();
- oldboxes.clear();
+ mboxes oldboxes;
swap (boxes, oldboxes);
- fast_oldboxes.clear();
- swap (fast_boxes, fast_oldboxes);
+
+ full_mboxes full_boxes;
+
+ fast_boxes.clear();
+ // cerr << "QQQ: regrid[1]" << endl;
boxes.resize (h.mglevels());
+ full_boxes.resize (h.mglevels());
fast_boxes.resize (h.mglevels());
for (int ml = 0; ml < h.mglevels(); ++ ml) {
+ // cerr << "QQQ: regrid[2] ml=" << ml << endl;
boxes.AT(ml).resize (h.reflevels());
+ full_boxes.AT(ml).resize (h.reflevels());
fast_boxes.AT(ml).resize (h.reflevels());
for (int rl = 0; rl < h.reflevels(); ++ rl) {
+ // cerr << "QQQ: regrid[3] rl=" << rl << endl;
boxes.AT(ml).AT(rl).resize (h.components(rl));
- fast_boxes.AT(ml).AT(rl).resize (dist::size());
+ full_boxes.AT(ml).AT(rl).resize (h.components(rl));
cboxes & level = boxes.AT(ml).AT(rl);
- fast_cboxes & fast_level = fast_boxes.AT(ml).AT(rl);
+ full_cboxes & full_level = full_boxes.AT(ml).AT(rl);
+ fast_dboxes & fast_level = fast_boxes.AT(ml).AT(rl);
+
+ vector<fast_dboxes> fast_level_otherprocs (dist::size());
// Domain:
+ // cerr << "QQQ: regrid[a]" << endl;
+
+ static Carpet::Timer timer_domain ("CarpetLib::dh::regrid::domain");
+ timer_domain.start();
ibbox const & domain_exterior = h.baseextent(ml,rl);
// Variables may have size zero
@@ -211,17 +249,24 @@ regrid ()
ibset domain_boundary = domain_exterior - domain_active;
domain_boundary.normalize();
+ timer_domain.stop();
+
+ static Carpet::Timer timer_region ("CarpetLib::dh::regrid::region");
+ timer_region.start();
+
+ // cerr << "QQQ: regrid[b]" << endl;
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes & box = boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes & box = full_level.AT(c);
// Interior:
ibbox & intr = box.interior;
+ intr = ibbox::poison();
// The interior of the grid has the extent as specified by the
// regridding thorn
@@ -237,10 +282,12 @@ regrid ()
"The interior must be contained in the domain");
// All interiors must be disjunct
+#ifdef CARPET_DEBUG
for (int cc = 0; cc < c; ++ cc) {
- ASSERT_cc (not intr.intersects (level.AT(cc).interior),
+ ASSERT_cc (not intr.intersects (full_level.AT(cc).interior),
"All interiors must be disjunct");
}
+#endif
@@ -261,6 +308,7 @@ regrid ()
// Exterior:
ibbox & extr = box.exterior;
+ extr = ibbox::poison();
ASSERT_c (all (all (ghost_width >= 0)),
"The gh ghost widths must not be negative");
@@ -280,6 +328,7 @@ regrid ()
// Cactus ghost zones (which include outer boundaries):
ibset & ghosts = box.ghosts;
+ ghosts = ibset::poison();
ghosts = extr - intr;
ghosts.normalize();
@@ -295,6 +344,7 @@ regrid ()
// Communicated region:
ibbox & comm = box.communicated;
+ comm = ibbox::poison();
comm = extr.expand (i2vect (is_outer_boundary) * (- boundary_width));
@@ -313,6 +363,7 @@ regrid ()
// Outer boundary:
ibset & outer_boundaries = box.outer_boundaries;
+ outer_boundaries = ibset::poison();
outer_boundaries = extr - comm;
outer_boundaries.normalize();
@@ -327,6 +378,7 @@ regrid ()
// Owned region:
ibbox & owned = box.owned;
+ owned = ibbox::poison();
owned = intr.expand (i2vect (is_outer_boundary) * (- boundary_width));
@@ -341,10 +393,12 @@ regrid ()
"The owned region must be contained in the active part of the domain");
// All owned regions must be disjunct
+#ifdef CARPET_DEBUG
for (int cc = 0; cc < c; ++ cc) {
- ASSERT_cc (not owned.intersects (level.AT(cc).owned),
+ ASSERT_cc (not owned.intersects (full_level.AT(cc).owned),
"All owned regions must be disjunct");
}
+#endif
@@ -352,6 +406,7 @@ regrid ()
// boundaries):
ibset & boundaries = box.boundaries;
+ boundaries = ibset::poison();
boundaries = comm - owned;
boundaries.normalize();
@@ -365,9 +420,15 @@ regrid ()
} // for c
+ timer_region.stop();
+
// Conjunction of all buffer zones:
+ // cerr << "QQQ: regrid[c]" << endl;
+
+ static Carpet::Timer timer_buffers ("CarpetLib::dh::regrid::buffers");
+ timer_buffers.start();
// Enlarge active part of domain
i2vect const safedist = i2vect (0);
@@ -376,7 +437,7 @@ regrid ()
// All owned regions
ibset allowned;
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes const & box = full_level.AT(c);
allowned += box.owned;
}
allowned.normalize();
@@ -409,20 +470,13 @@ regrid ()
for (int c = 0; c < h.components(rl); ++ c) {
-
- dboxes & box = boxes.AT(ml).AT(rl).AT(c);
-
-
+ full_dboxes & box = full_level.AT(c);
// Buffer zones:
-
box.buffers = box.owned & allbuffers;
box.buffers.normalize();
-
-
// Active region:
-
box.active = box.owned - box.buffers;
box.active.normalize();
@@ -431,22 +485,29 @@ regrid ()
// The conjunction of all buffer zones must equal allbuffers
+ // cerr << "QQQ: regrid[d]" << endl;
ibset allbuffers1;
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes const & box = full_level.AT(c);
allbuffers1 += box.buffers;
}
allbuffers1.normalize();
ASSERT_rl (allbuffers1 == allbuffers,
"Buffer zone consistency check");
+ timer_buffers.stop();
+
// Test constituency relations:
+ // cerr << "QQQ: regrid[e]" << endl;
+
+ static Carpet::Timer timer_test ("CarpetLib::dh::regrid::test");
+ timer_test.start();
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes const & box = full_level.AT(c);
ASSERT_c ((box.active & box.buffers).empty(),
"Consistency check");
@@ -473,24 +534,36 @@ regrid ()
} // for c
+ timer_test.stop();
+
// Communication schedule:
+ // cerr << "QQQ: regrid[4]" << endl;
- for (int c = 0; c < h.components(rl); ++ c) {
+ static Carpet::Timer timer_comm ("CarpetLib::dh::regrid::comm");
+ timer_comm.start();
+
+ for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+ int const c = h.get_component (rl, lc);
+ // cerr << "QQQ: regrid[4a] lc=" << lc << " c=" << c << endl;
- dboxes & box = boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes & box = full_level.AT(c);
// Multigrid restriction:
+ static Carpet::Timer timer_comm_mgrest
+ ("CarpetLib::dh::regrid::comm::mgrest");
+ timer_comm_mgrest.start();
+
if (ml > 0) {
int const oml = ml - 1;
// Multigrid restriction must fill all active points
- dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
+ full_dboxes const & obox = full_boxes.AT(oml).AT(rl).AT(c);
ibset needrecv = box.active;
@@ -513,11 +586,8 @@ regrid ()
ibbox const send = recv.expanded_for (obox.interior);
ASSERT_c (send <= obox.exterior,
"Multigrid restriction: Send region must be contained in exterior");
- if (on_this_proc (rl, c)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_mg_rest_sendrecv.push_back
- (sendrecv_pseudoregion_t (send, c, recv, c));
- }
+ fast_level.fast_mg_rest_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, c, recv, c));
}
needrecv -= ovlp;
@@ -529,9 +599,16 @@ regrid ()
} // if ml > 0
+ timer_comm_mgrest.stop();
+
// Multigrid prolongation:
+ // cerr << "QQQ: regrid[f]" << endl;
+
+ static Carpet::Timer timer_comm_mgprol
+ ("CarpetLib::dh::regrid::comm::mprol");
+ timer_comm_mgprol.start();
if (ml > 0) {
int const oml = ml - 1;
@@ -539,7 +616,7 @@ regrid ()
// Multigrid prolongation must fill all active points
// (this could probably be relaxed)
- dboxes const & obox = boxes.AT(oml).AT(rl).AT(c);
+ full_dboxes const & obox = full_boxes.AT(oml).AT(rl).AT(c);
ibset oneedrecv = obox.active;
@@ -565,11 +642,8 @@ regrid ()
recv.expanded_for (box.interior).expand (stencil_size);
ASSERT_c (send <= box.exterior,
"Multigrid prolongation: Send region must be contained in exterior");
- if (on_this_proc (rl, c)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_mg_prol_sendrecv.push_back
- (sendrecv_pseudoregion_t (send, c, recv, c));
- }
+ fast_level.fast_mg_prol_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, c, recv, c));
}
oneedrecv -= ovlp;
@@ -581,9 +655,16 @@ regrid ()
} // if ml > 0
+ timer_comm_mgprol.stop();
+
// Refinement prolongation:
+ // cerr << "QQQ: regrid[g]" << endl;
+
+ static Carpet::Timer timer_comm_refprol
+ ("CarpetLib::dh::regrid::comm::refprol");
+ timer_comm_refprol.start();
if (rl > 0) {
int const orl = rl - 1;
@@ -600,7 +681,7 @@ regrid ()
i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+ full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc);
ibset contracted_oactive;
for (ibset::const_iterator
@@ -617,16 +698,19 @@ regrid ()
ovlp.normalize();
for (ibset::const_iterator
- ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
{
ibbox const & recv = * ri;
ibbox const send =
recv.expanded_for (obox.interior).expand (stencil_size);
ASSERT_c (send <= obox.exterior,
"Refinement prolongation: Send region must be contained in exterior");
- if (on_this_proc (rl, c) or on_this_proc (orl, cc)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_ref_prol_sendrecv.push_back
+ fast_level.fast_ref_prol_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, cc, recv, c));
+ if (not on_this_proc (orl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(orl, cc));
+ fast_level_otherproc.fast_ref_prol_sendrecv.push_back
(sendrecv_pseudoregion_t (send, cc, recv, c));
}
}
@@ -635,83 +719,120 @@ regrid ()
} // for cc
- needrecv.normalize();
-
// All points must have been received
+ needrecv.normalize();
ASSERT_c (needrecv.empty(),
"Refinement prolongation: All points must have been received");
} // if rl > 0
+ timer_comm_refprol.stop();
+
// Synchronisation:
+ // cerr << "QQQ: regrid[h]" << endl;
- // Synchronisation should fill as many boundary points as
- // possible
+ static Carpet::Timer timer_comm_sync
+ ("CarpetLib::dh::regrid::comm::sync");
+ timer_comm_sync.start();
+ {
+
+ // Synchronisation should fill as many boundary points as
+ // possible
+
#if 0
- // Outer boundaries are not synchronised, since they cannot be
- // filled by boundary prolongation either, and therefore the
- // user code must set them anyway.
- ibset needrecv = box.boundaries;
+ // Outer boundaries are not synchronised, since they cannot
+ // be filled by boundary prolongation either, and therefore
+ // the user code must set them anyway.
+ ibset needrecv = box.boundaries;
#else
- // Outer boundaries are synchronised for backward
- // compatibility.
- ibset needrecv = box.ghosts;
+ // Outer boundaries are synchronised for backward
+ // compatibility.
+ ibset needrecv = box.ghosts;
#endif
-
- ibset & sync = box.sync;
-
- for (int cc = 0; cc < h.components(rl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(rl).AT(cc);
+ ibset const needrecv_orig = needrecv;
+ ibset & sync = box.sync;
+
+ for (int cc = 0; cc < h.components(rl); ++ cc) {
+ full_dboxes const & obox = full_level.AT(cc);
+
#if 0
- ibset ovlp = needrecv & obox.owned;
+ ibset ovlp = needrecv & obox.owned;
#else
- ibset ovlp = needrecv & obox.interior;
+ ibset ovlp = needrecv & obox.interior;
#endif
- ovlp.normalize();
-
- if (cc == c) {
- ASSERT_cc (ovlp.empty(),
- "A region may not synchronise from itself");
- }
-
- for (ibset::const_iterator
- ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- ibbox const & send = recv;
- if (on_this_proc (rl, c) or on_this_proc (rl, cc)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_sync_sendrecv.push_back
+ ovlp.normalize();
+
+ if (cc == c) {
+ ASSERT_cc (ovlp.empty(),
+ "A region may not synchronise from itself");
+ }
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ ibbox const & send = recv;
+ fast_level.fast_sync_sendrecv.push_back
(sendrecv_pseudoregion_t (send, cc, recv, c));
+ if (not on_this_proc (rl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(rl, cc));
+ fast_level_otherproc.fast_sync_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, cc, recv, c));
+ }
}
- }
+
+ needrecv -= ovlp;
+ sync += ovlp;
+
+ } // for cc
- needrecv -= ovlp;
- sync += ovlp;
+ sync.normalize();
- } // for cc
+ }
- needrecv.normalize();
- sync.normalize();
+ timer_comm_sync.stop();
// Boundary prolongation:
+ // cerr << "QQQ: regrid[i]" << endl;
+
+ static Carpet::Timer timer_comm_refbndprol
+ ("CarpetLib::dh::regrid::comm::refbndprol");
+ timer_comm_refbndprol.start();
if (rl > 0) {
int const orl = rl - 1;
+#if 0
+ // Outer boundaries are not synchronised, since they cannot
+ // be filled by boundary prolongation either, and therefore
+ // the user code must set them anyway.
+ ibset needrecv = box.boundaries;
+#else
+ // Outer boundaries are synchronised for backward
+ // compatibility.
+ ibset needrecv = box.ghosts;
+#endif
+
+ // Points which are synchronised need not be boundary
+ // prolongated
+ needrecv -= box.sync;
+
// Outer boundary points cannot be boundary prolongated
needrecv &= box.communicated;
// Prolongation must fill what cannot be synchronised, and
// also all buffer zones
needrecv += box.buffers;
+
needrecv.normalize();
+ ibset const needrecv_orig = needrecv;
ibset & bndref = box.bndref;
@@ -721,9 +842,10 @@ regrid ()
"Refinement factors must be integer multiples of each other");
i2vect const reffact =
i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
+ ivect const reffact1 = h.reffacts.at(rl) / h.reffacts.at(orl);
for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+ full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc);
ibset contracted_oactive;
for (ibset::const_iterator
@@ -747,251 +869,522 @@ regrid ()
recv.expanded_for (obox.interior).expand (stencil_size);
ASSERT_c (send <= obox.exterior,
"Boundary prolongation: Send region must be contained in exterior");
- if (on_this_proc (rl, c) or on_this_proc (orl, cc)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_ref_bnd_prol_sendrecv.push_back
+ fast_level.fast_ref_bnd_prol_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, cc, recv, c));
+ if (not on_this_proc (orl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(orl, cc));
+ fast_level_otherproc.fast_ref_bnd_prol_sendrecv.push_back
(sendrecv_pseudoregion_t (send, cc, recv, c));
}
}
needrecv -= ovlp;
bndref += ovlp;
-
+
} // for cc
- needrecv.normalize();
bndref.normalize();
+ // All points must now have been received, either through
+ // synchronisation or through boundary prolongation
+ needrecv.normalize();
+ ASSERT_c (needrecv.empty(),
+ "Synchronisation and boundary prolongation: All points must have been received");
+
} // if rl > 0
- // All points must now have been received, either through
- // synchronisation or through boundary prolongation
- ASSERT_c (needrecv.empty(),
- "Synchronisation and boundary prolongation: All points must have been received");
+ timer_comm_refbndprol.stop();
- } // for c
+ } // for lc
// Refinement restriction:
+ // cerr << "QQQ: regrid[j]" << endl;
+
+ static Carpet::Timer timer_comm_refrest
+ ("CarpetLib::dh::regrid::comm::refrest");
+ timer_comm_refrest.start();
if (rl > 0) {
int const orl = rl - 1;
- fast_cboxes & fast_olevel = fast_boxes.AT(ml).AT(orl);
-
- ibset needrecv;
- for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
- dboxes const & obox0 = boxes.AT(ml).AT(orl).AT(0);
-
- // Refinement restriction may fill all active points, and
- // must use all active points
-
- for (ibset::const_iterator
- ai = box.active.begin(); ai != box.active.end(); ++ ai)
- {
- ibbox const & active = * ai;
- needrecv += active.contracted_for (obox0.interior);
- }
- needrecv.normalize();
- } // for c
+ fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl);
- for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes & obox = boxes.AT(ml).AT(orl).AT(cc);
-
- for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
+ if (h.components(orl) > 0) {
+ for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+ int const c = h.get_component (rl, lc);
+
+ full_dboxes const & box = full_level.AT(c);
+ full_dboxes const & obox0 = full_boxes.AT(ml).AT(orl).AT(0);
+
+ // Refinement restriction may fill all active points, and
+ // must use all active points
- ibset contracted_active;
+ ibset needrecv;
for (ibset::const_iterator
ai = box.active.begin(); ai != box.active.end(); ++ ai)
{
ibbox const & active = * ai;
- contracted_active += active.contracted_for (obox.interior);
+ needrecv += active.contracted_for (obox0.interior);
}
- contracted_active.normalize();
+ needrecv.normalize();
- ibset ovlp = obox.active & contracted_active;
- ovlp.normalize();
-
- for (ibset::const_iterator
- ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- ibbox const send = recv.expanded_for (box.interior);
- ASSERT_c (send <= box.active,
- "Refinement restriction: Send region must be contained in active part");
- if (on_this_proc (rl, c) or on_this_proc (orl, cc)) {
- int const p = dist::rank();
- fast_olevel.AT(p).fast_ref_rest_sendrecv.push_back
+ for (int cc = 0; cc < h.components(orl); ++ cc) {
+ full_dboxes & obox = full_boxes.AT(ml).AT(orl).AT(cc);
+
+ ibset contracted_active;
+ for (ibset::const_iterator
+ ai = box.active.begin(); ai != box.active.end(); ++ ai)
+ {
+ ibbox const & active = * ai;
+ contracted_active += active.contracted_for (obox0.interior);
+ }
+ contracted_active.normalize();
+
+ ibset ovlp = obox.active & contracted_active;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ ibbox const send = recv.expanded_for (box.interior);
+ ASSERT_c (send <= box.active,
+ "Refinement restriction: Send region must be contained in active part");
+ fast_olevel.fast_ref_rest_sendrecv.push_back
(sendrecv_pseudoregion_t (send, c, recv, cc));
+ if (not on_this_proc (orl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(orl, cc));
+ fast_level_otherproc.fast_ref_rest_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, c, recv, cc));
+ }
}
- }
-
- needrecv -= ovlp;
- } // for c
-
- } // for cc
-
- needrecv.normalize();
-
- // All points must have been received
- ASSERT_rl (needrecv.empty(),
- "Refinement restriction: All points must have been received");
+ needrecv -= ovlp;
+
+ } // for cc
+
+ // All points must have been received
+ needrecv.normalize();
+ ASSERT_rl (needrecv.empty(),
+ "Refinement restriction: All points must have been received");
+
+ } // for lc
+ } // if orl not empty
} // if rl > 0
+ timer_comm_refrest.stop();
+
+ timer_comm.stop();
+
// Regridding schedule:
+ // cerr << "QQQ: regrid[5]" << endl;
- for (int c = 0; c < h.components(rl); ++ c) {
-
- dboxes & box = boxes.AT(ml).AT(rl).AT(c);
-
- ibset needrecv = box.active;
+ fast_level.do_init = do_init;
+ if (do_init) {
+ static Carpet::Timer timer_regrid ("CarpetLib::dh::regrid::regrid");
+ timer_regrid.start();
+ for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+ int const c = h.get_component (rl, lc);
+ // cerr << "QQQ: regrid[5a] lc=" << lc << " c=" << c << endl;
- // Synchronisation:
-
- if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) {
+ full_dboxes & box = full_level.AT(c);
- int const oldcomponents = oldboxes.AT(ml).AT(rl).size();
+ ibset needrecv = box.active;
- // Synchronisation copies from the same level of the old
- // grid structure. It should fill as many active points as
- // possible
- for (int cc = 0; cc < oldcomponents; ++ cc) {
- dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc);
+
+ // Synchronisation:
+ // cerr << "QQQ: regrid[k]" << endl;
+
+ static Carpet::Timer timer_regrid_sync
+ ("CarpetLib::dh::regrid::regrid::sync");
+ timer_regrid_sync.start();
+
+ if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl)
+ {
- ibset ovlp = needrecv & obox.owned;
- ovlp.normalize();
+ int const oldcomponents = oldboxes.AT(ml).AT(rl).size();
- for (ibset::const_iterator
- ri =ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- ibbox const & send = recv;
- if (on_this_proc (rl, c) or on_this_oldproc (rl, cc)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_old2new_sync_sendrecv.push_back
+ // Synchronisation copies from the same level of the old
+ // grid structure. It should fill as many active points
+ // as possible.
+
+ for (int cc = 0; cc < oldcomponents; ++ cc) {
+ dboxes const & obox = oldboxes.AT(ml).AT(rl).AT(cc);
+
+ ibset ovlp = needrecv & obox.owned;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ ibbox const & send = recv;
+ fast_level.fast_old2new_sync_sendrecv.push_back
(sendrecv_pseudoregion_t (send, cc, recv, c));
+ if (not on_this_oldproc (rl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(rl, cc));
+ fast_level_otherproc.fast_old2new_sync_sendrecv.push_back
+ (sendrecv_pseudoregion_t (send, cc, recv, c));
+ }
}
- }
+
+ needrecv -= ovlp;
+
+ } // for cc
- needrecv -= ovlp;
+ needrecv.normalize();
- } // for cc
+ } // if not oldboxes.empty
- needrecv.normalize();
-
- } // if not oldboxes.empty
-
-
-
- // Prolongation:
-
- if (rl > 0) {
- int const orl = rl - 1;
+ timer_regrid_sync.stop();
- // Prolongation interpolates from the next coarser level of
- // the new grid structure. It must fill what cannot be
- // synchronised
-
- i2vect const stencil_size = i2vect (prolongation_stencil_size());
- ASSERT_c (all (h.reffacts.at(rl) % h.reffacts.at(orl) == 0),
- "Refinement factors must be integer multiples of each other");
- i2vect const reffact =
- i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
- for (int cc = 0; cc < h.components(orl); ++ cc) {
- dboxes const & obox = boxes.AT(ml).AT(orl).AT(cc);
+ // Prolongation:
+ // cerr << "QQQ: regrid[l]" << endl;
+
+ static Carpet::Timer timer_regrid_prolongate
+ ("CarpetLib::dh::regrid::regrid::prolongate");
+ timer_regrid_prolongate.start();
+
+ if (rl > 0) {
+ int const orl = rl - 1;
- ibset contracted_oactive;
- for (ibset::const_iterator
- ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
- {
- ibbox const & oactive = * ai;
- // untested for cell centering
- contracted_oactive +=
- oactive.contracted_for (box.interior).expand (reffact);
- }
- contracted_oactive.normalize();
+ // Prolongation interpolates from the next coarser level
+ // of the new grid structure. It must fill what cannot be
+ // synchronised.
- ibset ovlp = needrecv & contracted_oactive;
- ovlp.normalize();
+ i2vect const stencil_size = i2vect (prolongation_stencil_size());
- for (ibset::const_iterator
- ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
- {
- ibbox const & recv = * ri;
- ibbox const send =
- recv.expanded_for (obox.interior).expand (stencil_size);
- ASSERT_c (send <= obox.exterior,
- "Regridding prolongation: Send region must be contained in exterior");
- if (on_this_proc (rl, c) or on_this_proc (orl, cc)) {
- int const p = dist::rank();
- fast_level.AT(p).fast_old2new_ref_prol_sendrecv.push_back
+ ASSERT_c (all (h.reffacts.at(rl) % h.reffacts.at(orl) == 0),
+ "Refinement factors must be integer multiples of each other");
+ i2vect const reffact =
+ i2vect (h.reffacts.at(rl) / h.reffacts.at(orl));
+
+ for (int cc = 0; cc < h.components(orl); ++ cc) {
+ full_dboxes const & obox = full_boxes.AT(ml).AT(orl).AT(cc);
+
+ ibset contracted_oactive;
+ for (ibset::const_iterator
+ ai = obox.active.begin(); ai != obox.active.end(); ++ ai)
+ {
+ ibbox const & oactive = * ai;
+ // untested for cell centering
+ contracted_oactive +=
+ oactive.contracted_for (box.interior).expand (reffact);
+ }
+ contracted_oactive.normalize();
+
+ ibset ovlp = needrecv & contracted_oactive;
+ ovlp.normalize();
+
+ for (ibset::const_iterator
+ ri = ovlp.begin(); ri != ovlp.end(); ++ ri)
+ {
+ ibbox const & recv = * ri;
+ ibbox const send =
+ recv.expanded_for (obox.interior).expand (stencil_size);
+ ASSERT_c (send <= obox.exterior,
+ "Regridding prolongation: Send region must be contained in exterior");
+ fast_level.fast_old2new_ref_prol_sendrecv.push_back
(sendrecv_pseudoregion_t (send, cc, recv, c));
+ if (not on_this_proc (orl, cc)) {
+ fast_dboxes & fast_level_otherproc =
+ fast_level_otherprocs.AT(this_proc(orl, cc));
+ fast_level_otherproc.fast_old2new_ref_prol_sendrecv.
+ push_back (sendrecv_pseudoregion_t (send, cc, recv, c));
+ }
}
- }
+
+ needrecv -= ovlp;
+
+ } // for cc
- needrecv -= ovlp;
+ needrecv.normalize();
- } // for cc
+ } // if rl > 0
- needrecv.normalize();
+ if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > 0) {
+ // All points must now have been received, either through
+ // synchronisation or through prolongation
+ ASSERT_c (needrecv.empty(),
+ "Regridding prolongation: All points must have been received");
+ }
- } // if rl > 0
+ timer_regrid_prolongate.stop();
+
+ } // for lc
- if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > 0) {
- // All points must now have been received, either through
- // synchronisation or through prolongation
- ASSERT_c (needrecv.empty(),
- "Regridding prolongation: All points must have been received");
+ timer_regrid.stop();
+
+ } // if do_init
+
+
+
+ // cerr << "QQQ: regrid[6]" << endl;
+ for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+ int const c = h.get_component (rl, lc);
+
+ level.AT(c).exterior = full_level.AT(c).exterior;
+ level.AT(c).owned = full_level.AT(c).owned;
+ level.AT(c).interior = full_level.AT(c).interior;
+
+ level.AT(c).exterior_size = full_level.AT(c).exterior.size();
+ level.AT(c).owned_size = full_level.AT(c).owned.size();
+ level.AT(c).active_size = full_level.AT(c).active.size();
+
+ } // for lc
+
+
+
+ // Broadcast grid structure and communication schedule
+ // cerr << "QQQ: regrid[7]" << endl;
+
+ {
+
+ static Carpet::Timer timer_bcast_boxes
+ ("CarpetLib::dh::regrid::bcast_boxes");
+ timer_bcast_boxes.start();
+
+ int const count_send = h.local_components(rl);
+ vector<dboxes> level_send (count_send);
+ for (int lc = 0; lc < h.local_components(rl); ++ lc) {
+ int const c = h.get_component (rl, lc);
+ level_send.AT(lc) = level.AT(c);
+ }
+ // cerr << "QQQ: regrid[7a]" << endl;
+ vector<vector<dboxes> > const level_recv =
+ allgatherv (dist::comm(), level_send);
+ // cerr << "QQQ: regrid[7b]" << endl;
+ vector<int> count_recv (dist::size(), 0);
+ for (int c = 0; c < h.components(rl); ++ c) {
+ int const p = this_proc (rl, c);
+ if (p != dist::rank()) {
+ level.AT(c) = level_recv.AT(p).AT(count_recv.AT(p));
+ ++ count_recv.AT(p);
+ }
+ }
+ for (int p = 0; p < dist::size(); ++ p) {
+ if (p != dist::rank()) {
+ assert (count_recv.AT(p) == int(level_recv.AT(p).size()));
+ }
}
+ // cerr << "QQQ: regrid[7c]" << endl;
- } // for c
+ timer_bcast_boxes.stop();
+
+ }
- } // for rl
- } // for m
-
-
-
- // Output:
- if (output_bboxes or there_was_an_error) {
-
- for (int ml = 0; ml < h.mglevels(); ++ ml) {
- for (int rl = 0; rl < h.reflevels(); ++ rl) {
+ {
+
+ static Carpet::Timer timer_bcast_comm
+ ("CarpetLib::dh::regrid::bcast_comm");
+ timer_bcast_comm.start();
+
+ static Carpet::Timer timer_bcast_comm_ref_prol
+ ("CarpetLib::dh::regrid::bcast_comm::ref_prol");
+ timer_bcast_comm_ref_prol.start();
+ broadcast_schedule (fast_level_otherprocs, fast_level,
+ & fast_dboxes::fast_ref_prol_sendrecv);
+ timer_bcast_comm_ref_prol.stop();
+
+ static Carpet::Timer timer_bcast_comm_sync
+ ("CarpetLib::dh::regrid::bcast_comm::sync");
+ timer_bcast_comm_sync.start();
+ broadcast_schedule (fast_level_otherprocs, fast_level,
+ & fast_dboxes::fast_sync_sendrecv);
+ timer_bcast_comm_sync.stop();
+
+ static Carpet::Timer timer_bcast_comm_ref_bnd_prol
+ ("CarpetLib::dh::regrid::bcast_comm::ref_bnd_prol");
+ timer_bcast_comm_ref_bnd_prol.start();
+ broadcast_schedule (fast_level_otherprocs, fast_level,
+ & fast_dboxes::fast_ref_bnd_prol_sendrecv);
+ timer_bcast_comm_ref_bnd_prol.stop();
+
+ if (rl > 0) {
+ int const orl = rl - 1;
+ fast_dboxes & fast_olevel = fast_boxes.AT(ml).AT(orl);
+ static Carpet::Timer timer_bcast_comm_ref_rest
+ ("CarpetLib::dh::regrid::bcast_comm::ref_rest");
+ timer_bcast_comm_ref_rest.start();
+ broadcast_schedule (fast_level_otherprocs, fast_olevel,
+ & fast_dboxes::fast_ref_rest_sendrecv);
+ timer_bcast_comm_ref_rest.stop();
+ }
+
+ // TODO: Maybe broadcast old2new schedule only if do_init is
+ // set
+ static Carpet::Timer timer_bcast_comm_old2new_sync
+ ("CarpetLib::dh::regrid::bcast_comm::old2new_sync");
+ timer_bcast_comm_old2new_sync.start();
+ broadcast_schedule (fast_level_otherprocs, fast_level,
+ & fast_dboxes::fast_old2new_sync_sendrecv);
+ timer_bcast_comm_old2new_sync.stop();
+
+ static Carpet::Timer timer_bcast_comm_old2new_ref_prol
+ ("CarpetLib::dh::regrid::bcast_comm::old2new_ref_prol");
+ timer_bcast_comm_old2new_ref_prol.start();
+ broadcast_schedule (fast_level_otherprocs, fast_level,
+ & fast_dboxes::fast_old2new_ref_prol_sendrecv);
+ timer_bcast_comm_old2new_ref_prol.stop();
+
+ timer_bcast_comm.stop();
+
+ }
+
+
+
+ // Output:
+ if (output_bboxes or there_was_an_error) {
+
for (int c = 0; c < h.components(rl); ++ c) {
- dboxes const & box = boxes.AT(ml).AT(rl).AT(c);
- fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl).AT(c);
+ full_dboxes const & box = full_boxes.AT(ml).AT(rl).AT(c);
cout << eol;
cout << "ml=" << ml << " rl=" << rl << " c=" << c << eol;
cout << box;
- cout << fast_box;
- cout << endl;
} // for c
- } // for rl
- } // for m
+
+ fast_dboxes const & fast_box = fast_boxes.AT(ml).AT(rl);
+
+ cout << eol;
+ cout << "ml=" << ml << " rl=" << rl << eol;
+ cout << fast_box;
+
+ } // if output_bboxes
+
+
+
+ // Free memory early to save space
+ if (int (oldboxes.size()) > ml and int (oldboxes.AT(ml).size()) > rl) {
+ oldboxes.AT(ml).AT(rl).clear();
+ }
+
+ if (ml > 0) {
+ if (rl > 0) {
+ full_boxes.AT(ml-1).AT(rl-1).clear();
+ }
+ if (rl == h.reflevels()-1) {
+ full_boxes.AT(ml-1).AT(rl).clear();
+ }
+ }
+ if (ml == h.mglevels()-1) {
+ if (rl > 0) {
+ full_boxes.AT(ml).AT(rl-1).clear();
+ }
+ if (rl == h.reflevels()-1) {
+ full_boxes.AT(ml).AT(rl).clear();
+ }
+ }
+
+ } // for rl
+
+ if (ml > 0) {
+ full_boxes.AT(ml-1).clear();
+ }
+ if (ml == h.mglevels()-1) {
+ full_boxes.AT(ml).clear();
+ }
+
+ } // for ml
+
+
+
+ // Output:
+ if (output_bboxes or there_was_an_error) {
+
+ cout << eol;
+ cout << "memoryof(gh)=" << memoryof(h) << eol;
+ cout << "memoryof(dh)=" << memoryof(*this) << eol;
+ cout << "memoryof(dh.boxes)=" << memoryof(boxes) << eol;
+ cout << "memoryof(dh.fast_boxes)=" << memoryof(fast_boxes) << eol;
+ int gfcount = 0;
+ size_t gfmemory = 0;
+ for (list<ggf*>::const_iterator
+ gfi = gfs.begin(); gfi != gfs.end(); ++ gfi)
+ {
+ ++ gfcount;
+ gfmemory += memoryof(**gfi);
+ }
+ cout << "#gfs=" << gfcount << eol;
+ cout << "memoryof(gfs)=" << gfmemory << eol;
} // if output_bboxes
if (there_was_an_error) {
CCTK_WARN (CCTK_WARN_ABORT,
- "The grid structure is inconsistent. "
- "It is impossible to continue.");
+ "The grid structure is inconsistent. It is impossible to continue.");
}
total.stop (0);
+ timer.stop();
+}
+
+
+
+void
+dh::
+broadcast_schedule (vector<fast_dboxes> & fast_level_otherprocs,
+ fast_dboxes & fast_level,
+ srpvect fast_dboxes::* const schedule_item)
+{
+ // cerr << "QQQ: broadcast_schedule[1]" << endl;
+ static Carpet::Timer timer_bs1 ("CarpetLib::dh::bs1");
+ timer_bs1.start();
+ vector <srpvect> send (dist::size());
+ for (int p=0; p<dist::size(); ++p) {
+ swap (send.AT(p), fast_level_otherprocs.AT(p).*schedule_item);
+ }
+ timer_bs1.stop();
+
+ static Carpet::Timer timer_bs2 ("CarpetLib::dh::bs2");
+ timer_bs2.start();
+ srpvect const recv = alltoallv1 (dist::comm(), send);
+ timer_bs2.stop();
+
+ static Carpet::Timer timer_bs3 ("CarpetLib::dh::bs3");
+ timer_bs3.start();
+ (fast_level.*schedule_item).insert
+ ((fast_level.*schedule_item).end(), recv.begin(), recv.end());
+ timer_bs3.stop();
+ // cerr << "QQQ: broadcast_schedule[2]" << endl;
+}
+
+
+
+void
+dh::
+regrid_free (bool const do_init)
+{
+ if (do_init) {
+ for (int ml = 0; ml < h.mglevels(); ++ ml) {
+ for (int rl = 0; rl < h.reflevels(); ++ rl) {
+ fast_boxes.AT(ml).AT(rl).fast_old2new_sync_sendrecv.clear();
+ fast_boxes.AT(ml).AT(rl).fast_old2new_ref_prol_sendrecv.clear();
+ }
+ }
+ } else {
+ for (int ml = 0; ml < h.mglevels(); ++ ml) {
+ for (int rl = 0; rl < h.reflevels(); ++ rl) {
+ assert (fast_boxes.AT(ml).AT(rl).fast_old2new_sync_sendrecv.empty());
+ assert (fast_boxes.AT(ml).AT(rl).fast_old2new_ref_prol_sendrecv.empty());
+ }
+ }
+ }
}
@@ -1004,7 +1397,7 @@ recompose (int const rl, bool const do_prolongate)
assert (rl>=0 and rl<h.reflevels());
- static Timer timer ("dh::recompose");
+ static Carpet::Timer timer ("CarpetLib::dh::recompose");
timer.start ();
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
@@ -1017,11 +1410,21 @@ recompose (int const rl, bool const do_prolongate)
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
(*f)->recompose_allocate (rl);
}
+#warning "TODO: If this works, rename do_prolongate to do_init here, and remove the do_prolongate parameter from ggf::recompose_fill"
+#if 0
for (comm_state state; not state.done(); state.step()) {
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
(*f)->recompose_fill (state, rl, do_prolongate);
}
}
+#endif
+ if (do_prolongate) {
+ for (comm_state state; not state.done(); state.step()) {
+ for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
+ (*f)->recompose_fill (state, rl, true);
+ }
+ }
+ }
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
(*f)->recompose_free_old (rl);
}
@@ -1030,33 +1433,144 @@ recompose (int const rl, bool const do_prolongate)
// but requires less memory. This is the default.
for (list<ggf*>::iterator f=gfs.begin(); f!=gfs.end(); ++f) {
(*f)->recompose_allocate (rl);
+#if 0
for (comm_state state; not state.done(); state.step()) {
(*f)->recompose_fill (state, rl, do_prolongate);
}
+#endif
+ if (do_prolongate) {
+ for (comm_state state; not state.done(); state.step()) {
+ (*f)->recompose_fill (state, rl, true);
+ }
+ }
(*f)->recompose_free_old (rl);
}
}
- timer.stop (0);
+ timer.stop ();
}
// Grid function management
-void
+dh::ggf_handle
dh::
add (ggf * const f)
{
CHECKPOINT;
- gfs.push_back (f);
+ return gfs.insert (gfs.end(), f);
}
void
dh::
-remove (ggf * const f)
+erase (ggf_handle const fi)
{
CHECKPOINT;
- gfs.remove (f);
+ gfs.erase (fi);
+}
+
+
+
+// Equality
+
+bool
+dh::full_dboxes::
+operator== (full_dboxes const & b) const
+{
+ return
+ exterior == b.exterior and
+ all(all(is_outer_boundary == b.is_outer_boundary)) and
+ outer_boundaries == b.outer_boundaries and
+ communicated == b.communicated and
+ boundaries == b.boundaries and
+ owned == b.owned and
+ buffers == b.buffers and
+ active == b.active and
+ sync == b.sync and
+ bndref == b.bndref and
+ ghosts == b.ghosts and
+ interior == b.interior;
+}
+
+
+
+// MPI datatypes
+
+MPI_Datatype
+mpi_datatype (dh::dboxes const &)
+{
+ static bool initialised = false;
+ static MPI_Datatype newtype;
+ if (not initialised) {
+ static dh::dboxes s;
+#define ENTRY(type, name) \
+ { \
+ sizeof s.name / sizeof(type), /* count elements */ \
+ (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \
+ dist::mpi_datatype<type>(), /* find MPI datatype */ \
+ STRINGIFY(name), /* field name */ \
+ STRINGIFY(type), /* type name */ \
+ }
+ dist::mpi_struct_descr_t const descr[] = {
+ ENTRY(int, exterior),
+ ENTRY(int, owned),
+ ENTRY(int, interior),
+ ENTRY(dh::dboxes::size_type, exterior_size),
+ ENTRY(dh::dboxes::size_type, owned_size),
+ ENTRY(dh::dboxes::size_type, active_size),
+ {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"}
+ };
+#undef ENTRY
+ newtype =
+ dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr,
+ "dh::dboxes", sizeof s);
+#if 0
+ int type_size;
+ MPI_Type_size (newtype, & type_size);
+ assert (type_size <= sizeof s);
+ MPI_Aint type_lb, type_ub;
+ MPI_Type_lb (newtype, & type_lb);
+ MPI_Type_ub (newtype, & type_ub);
+ assert (type_ub - type_lb == sizeof s);
+#endif
+ initialised = true;
+ }
+ return newtype;
+}
+
+MPI_Datatype
+mpi_datatype (dh::fast_dboxes const &)
+{
+ static bool initialised = false;
+ static MPI_Datatype newtype;
+ if (not initialised) {
+ static dh::fast_dboxes s;
+#define ENTRY(type, name) \
+ { \
+ sizeof s.name / sizeof(type), /* count elements */ \
+ (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \
+ dist::mpi_datatype<type>(), /* find MPI datatype */ \
+ STRINGIFY(name), /* field name */ \
+ STRINGIFY(type), /* type name */ \
+ }
+ dist::mpi_struct_descr_t const descr[] = {
+ ENTRY (dh::srpvect, fast_mg_rest_sendrecv),
+ ENTRY (dh::srpvect, fast_mg_prol_sendrecv),
+ ENTRY (dh::srpvect, fast_ref_prol_sendrecv),
+ ENTRY (dh::srpvect, fast_ref_rest_sendrecv),
+ ENTRY (dh::srpvect, fast_sync_sendrecv),
+ ENTRY (dh::srpvect, fast_ref_bnd_prol_sendrecv),
+ ENTRY (dh::srpvect, fast_old2new_sync_sendrecv),
+ ENTRY (dh::srpvect, fast_old2new_ref_prol_sendrecv),
+ {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"}
+ };
+#undef ENTRY
+ newtype =
+ dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr,
+ "dh::fast_dboxes", sizeof s);
+ initialised = true;
+ }
+ return newtype;
}
@@ -1069,22 +1583,48 @@ memory ()
const
{
return
+ sizeof alldhi + // memoryof (alldhi) +
+ sizeof & h + // memoryof (& h) +
+ sizeof gh_handle + // memoryof (gh_handle) +
memoryof (ghost_width) +
memoryof (buffer_width) +
memoryof (prolongation_order_space) +
memoryof (boxes) +
memoryof (fast_boxes) +
- memoryof (fast_oldboxes) +
memoryof (gfs);
}
size_t
+dh::
+allmemory ()
+{
+ size_t mem = memoryof(alldh);
+ for (list<dh*>::const_iterator
+ dhi = alldh.begin(); dhi != alldh.end(); ++ dhi)
+ {
+ mem += memoryof(**dhi);
+ }
+ return mem;
+}
+
+size_t
dh::dboxes::
memory ()
const
{
return
memoryof (exterior) +
+ memoryof (owned) +
+ memoryof (interior);
+}
+
+size_t
+dh::full_dboxes::
+memory ()
+ const
+{
+ return
+ memoryof (exterior) +
memoryof (is_outer_boundary) +
memoryof (outer_boundaries) +
memoryof (communicated) +
@@ -1116,6 +1656,135 @@ memory ()
+// Input
+
+istream &
+dh::dboxes::
+input (istream & is)
+{
+ // Regions:
+ try {
+ skipws (is);
+ consume (is, "dh::dboxes:{");
+ skipws (is);
+ consume (is, "exterior:");
+ is >> exterior;
+ exterior_size = exterior.size();
+ skipws (is);
+ consume (is, "owned:");
+ is >> owned;
+ owned_size = owned.size();
+ skipws (is);
+ consume (is, "interior:");
+ is >> interior;
+ skipws (is);
+ consume (is, "active_size:");
+ is >> active_size;
+ skipws (is);
+ consume (is, "}");
+ } catch (input_error & err) {
+ cout << "Input error while reading a dh::full_dboxes" << endl;
+ throw err;
+ }
+ return is;
+}
+
+istream &
+dh::full_dboxes::
+input (istream & is)
+{
+ // Regions:
+ try {
+ skipws (is);
+ consume (is, "dh::full_dboxes:{");
+ skipws (is);
+ consume (is, "exterior:");
+ is >> exterior;
+ skipws (is);
+ consume (is, "is_outer_boundary:");
+ is >> is_outer_boundary;
+ skipws (is);
+ consume (is, "outer_boundaries:");
+ is >> outer_boundaries;
+ skipws (is);
+ consume (is, "communicated:");
+ is >> communicated;
+ skipws (is);
+ consume (is, "boundaries:");
+ is >> boundaries;
+ skipws (is);
+ consume (is, "owned:");
+ is >> owned;
+ skipws (is);
+ consume (is, "buffers:");
+ is >> buffers;
+ skipws (is);
+ consume (is, "active:");
+ is >> active;
+ skipws (is);
+ consume (is, "sync:");
+ is >> sync;
+ skipws (is);
+ consume (is, "bndref:");
+ is >> bndref;
+ skipws (is);
+ consume (is, "ghosts:");
+ is >> ghosts;
+ skipws (is);
+ consume (is, "interior:");
+ is >> interior;
+ skipws (is);
+ consume (is, "}");
+ } catch (input_error & err) {
+ cout << "Input error while reading a dh::full_dboxes" << endl;
+ throw err;
+ }
+ return is;
+}
+
+istream &
+dh::fast_dboxes::
+input (istream & is)
+{
+ // Communication schedule:
+ try {
+ skipws (is);
+ consume (is, "dh::fast_dboxes:{");
+ skipws (is);
+ consume (is, "fast_mg_rest_sendrecv:");
+ is >> fast_mg_rest_sendrecv;
+ skipws (is);
+ consume (is, "fast_mg_prol_sendrecv:");
+ is >> fast_mg_prol_sendrecv;
+ skipws (is);
+ consume (is, "fast_ref_prol_sendrecv:");
+ is >> fast_ref_prol_sendrecv;
+ skipws (is);
+ consume (is, "fast_ref_rest_sendrecv:");
+ is >> fast_ref_rest_sendrecv;
+ skipws (is);
+ consume (is, "fast_sync_sendrecv:");
+ is >> fast_sync_sendrecv;
+ skipws (is);
+ consume (is, "fast_ref_bnd_prol_sendrecv:");
+ is >> fast_ref_bnd_prol_sendrecv;
+ skipws (is);
+ consume (is, "fast_old2new_sync_sendrecv:");
+ is >> fast_old2new_sync_sendrecv;
+ skipws (is);
+ consume (is, "fast_old2new_ref_prol_sendrecv:");
+ is >> fast_old2new_ref_prol_sendrecv;
+ skipws (is);
+ consume (is, "}");
+ } catch (input_error & err) {
+ cout << "Input error while reading a dh::fast_dboxes" << endl;
+ throw err;
+ }
+ return is;
+}
+
+
+
// Output
ostream &
@@ -1149,19 +1818,35 @@ output (ostream & os)
const
{
// Regions:
- os << "dh::dboxes:" << eol;
- os << "exterior:" << exterior << eol;
- os << "is_outer_boundary:" << is_outer_boundary << eol;
- os << "outer_boundaries:" << outer_boundaries << eol;
- os << "communicated:" << communicated << eol;
- os << "boundaries:" << boundaries << eol;
- os << "owned:" << owned << eol;
- os << "buffers:" << buffers << eol;
- os << "active:" << active << eol;
- os << "sync:" << sync << eol;
- os << "bndref:" << bndref << eol;
- os << "ghosts:" << ghosts << eol;
- os << "interior:" << interior << eol;
+ os << "dh::dboxes:{" << eol
+ << " exterior: " << exterior << eol
+ << " owned: " << owned << eol
+ << " interior: " << interior << eol
+ << " active_size: " << active_size << eol
+ << "}" << eol;
+ return os;
+}
+
+ostream &
+dh::full_dboxes::
+output (ostream & os)
+ const
+{
+ // Regions:
+ os << "dh::full_dboxes:{" << eol
+ << " exterior: " << exterior << eol
+ << " is_outer_boundary: " << is_outer_boundary << eol
+ << " outer_boundaries: " << outer_boundaries << eol
+ << " communicated: " << communicated << eol
+ << " boundaries: " << boundaries << eol
+ << " owned: " << owned << eol
+ << " buffers: " << buffers << eol
+ << " active: " << active << eol
+ << " sync: " << sync << eol
+ << " bndref: " << bndref << eol
+ << " ghosts: " << ghosts << eol
+ << " interior: " << interior << eol
+ << "}" << eol;
return os;
}
@@ -1171,14 +1856,15 @@ output (ostream & os)
const
{
// Communication schedule:
- os << "dh::fast_dboxes:" << eol;
- os << "fast_mg_rest_sendrecv: " << fast_mg_rest_sendrecv << eol;
- os << "fast_mg_prol_sendrecv: " << fast_mg_prol_sendrecv << eol;
- os << "fast_ref_prol_sendrecv: " << fast_ref_prol_sendrecv << eol;
- os << "fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol;
- os << "fast_sync_sendrecv: " << fast_sync_sendrecv << eol;
- os << "fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol;
- os << "fast_old2new_sync_sendrecv:" << fast_old2new_sync_sendrecv << eol;
- os << "fast_old2new_ref_prol_sendrecv:" << fast_old2new_ref_prol_sendrecv << eol;
+ os << "dh::fast_dboxes:{" << eol
+ << " fast_mg_rest_sendrecv: " << fast_mg_rest_sendrecv << eol
+ << " fast_mg_prol_sendrecv: " << fast_mg_prol_sendrecv << eol
+ << " fast_ref_prol_sendrecv: " << fast_ref_prol_sendrecv << eol
+ << " fast_ref_rest_sendrecv: " << fast_ref_rest_sendrecv << eol
+ << " fast_sync_sendrecv: " << fast_sync_sendrecv << eol
+ << " fast_ref_bnd_prol_sendrecv: " << fast_ref_bnd_prol_sendrecv << eol
+ << " fast_old2new_sync_sendrecv: " << fast_old2new_sync_sendrecv << eol
+ << " fast_old2new_ref_prol_sendrecv: " << fast_old2new_ref_prol_sendrecv << eol
+ << "}" << eol;
return os;
}
diff --git a/Carpet/CarpetLib/src/dh.hh b/Carpet/CarpetLib/src/dh.hh
index 078e0b725..93a29f83b 100644
--- a/Carpet/CarpetLib/src/dh.hh
+++ b/Carpet/CarpetLib/src/dh.hh
@@ -24,9 +24,13 @@ class ggf;
class dh;
+
// A data hierarchy (grid hierarchy plus ghost zones)
class dh {
+ static list<dh*> alldh;
+ list<dh*>::iterator alldhi;
+
// Types
public:
typedef list<ibbox> iblist;
@@ -42,6 +46,23 @@ public:
// Region description:
ibbox exterior; // whole region (including boundaries)
+ ibbox owned; // evolved in time
+ ibbox interior; // interior (without ghost zones)
+
+ // Region statistics:
+ typedef ibbox::size_type size_type;
+ size_type exterior_size, owned_size, active_size;
+
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ istream & input (istream & is);
+ ostream & output (ostream & os) const;
+ };
+
+ struct full_dboxes {
+
+ // Complete region description:
+
+ ibbox exterior; // whole region (including boundaries)
b2vect is_outer_boundary;
ibset outer_boundaries; // outer boundary
@@ -61,7 +82,14 @@ public:
ibset ghosts; // ghost zones, as seen from Cactus
ibbox interior; // interior (without ghost zones)
- size_t memory () const;
+ bool operator== (full_dboxes const & b) const;
+ bool operator!= (full_dboxes const & b) const
+ {
+ return not operator==(b);
+ }
+
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ istream & input (istream& is);
ostream & output (ostream & os) const;
};
@@ -78,10 +106,19 @@ public:
// Regridding schedule:
+ bool do_init; // the srpvects below are only defined
+ // if this is true
srpvect fast_old2new_sync_sendrecv;
srpvect fast_old2new_ref_prol_sendrecv;
- size_t memory () const;
+ bool operator== (fast_dboxes const & b) const CCTK_ATTRIBUTE_PURE;
+ bool operator!= (fast_dboxes const & b) const
+ {
+ return not operator==(b);
+ }
+
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ istream & input (istream & is);
ostream & output (ostream & os) const;
};
@@ -91,8 +128,11 @@ private:
typedef vector<cboxes> rboxes; // ... for each refinement level
typedef vector<rboxes> mboxes; // ... for each multigrid level
- typedef vector<fast_dboxes> fast_cboxes; // ... for each component
- typedef vector<fast_cboxes> fast_rboxes; // ... for each refinement level
+ typedef vector<full_dboxes> full_cboxes; // ... for each component
+ typedef vector<full_cboxes> full_rboxes; // ... for each refinement level
+ typedef vector<full_rboxes> full_mboxes; // ... for each multigrid level
+
+ typedef vector<fast_dboxes> fast_rboxes; // ... for each refinement level
typedef vector<fast_rboxes> fast_mboxes; // ... for each multigrid level
@@ -104,16 +144,17 @@ public: // should be readonly
// Fields
gh & h; // hierarchy
+ gh::dh_handle gh_handle;
+
i2vect ghost_width; // number of ghost zones
i2vect buffer_width; // number of buffer zones
int prolongation_order_space; // order of spatial prolongation operator
mboxes boxes; // grid hierarchy
- mboxes oldboxes; // old grid hierarchy, used during regridding
fast_mboxes fast_boxes; // grid hierarchy
- fast_mboxes fast_oldboxes;
+ typedef list<ggf*>::iterator ggf_handle;
list<ggf*> gfs; // list of all grid functions
public:
@@ -127,51 +168,101 @@ public:
~dh ();
// Helpers
- int prolongation_stencil_size () const;
+ int prolongation_stencil_size () const CCTK_ATTRIBUTE_CONST;
// Modifiers
- void regrid ();
+ void regrid (bool do_init);
+ void regrid_free (bool do_init);
void recompose (int rl, bool do_prolongate);
private:
- int this_proc (int rl, int c) const;
- bool on_this_proc (int rl, int c) const;
- bool on_this_proc (int rl, int c, int cc) const;
- int this_oldproc (int rl, int c) const;
- bool on_this_oldproc (int rl, int c) const;
+ int this_proc (int rl, int c) const CCTK_ATTRIBUTE_PURE;
+ bool on_this_proc (int rl, int c) const CCTK_ATTRIBUTE_PURE;
+ bool on_this_proc (int rl, int c, int cc) const CCTK_ATTRIBUTE_PURE;
+ int this_oldproc (int rl, int c) const CCTK_ATTRIBUTE_PURE;
+ bool on_this_oldproc (int rl, int c) const CCTK_ATTRIBUTE_PURE;
+
+ static
+ void
+ broadcast_schedule (vector<fast_dboxes> & fast_level_otherprocs,
+ fast_dboxes & fast_level,
+ srpvect fast_dboxes::* const schedule_item);
public:
// Grid function management
- void add (ggf * f);
- void remove (ggf * f);
+ ggf_handle add (ggf * f);
+ void erase (ggf_handle fi);
// Output
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ static size_t allmemory () CCTK_ATTRIBUTE_PURE;
ostream & output (ostream & os) const;
};
+MPI_Datatype mpi_datatype (dh::dboxes const &) CCTK_ATTRIBUTE_CONST;
+MPI_Datatype mpi_datatype (dh::fast_dboxes const &);
+namespace dist {
+ template<> inline MPI_Datatype mpi_datatype<dh::dboxes> ()
+ CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype<dh::dboxes> ()
+ { dh::dboxes dummy; return mpi_datatype(dummy); }
+ template<> inline MPI_Datatype mpi_datatype<dh::fast_dboxes> ()
+ CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype<dh::fast_dboxes> ()
+ { dh::fast_dboxes dummy; return mpi_datatype(dummy); }
+}
+
+inline size_t memoryof (dh::dboxes const & b) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (dh::dboxes const & b)
{
return b.memory ();
}
+inline size_t memoryof (dh::full_dboxes const & b) CCTK_ATTRIBUTE_PURE;
+inline size_t memoryof (dh::full_dboxes const & b)
+{
+ return b.memory ();
+}
+
+inline size_t memoryof (dh::fast_dboxes const & b) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (dh::fast_dboxes const & b)
{
return b.memory ();
}
+inline size_t memoryof (dh const & d) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (dh const & d)
{
return d.memory ();
}
+inline istream & operator>> (istream & is, dh::dboxes & b)
+{
+ return b.input (is);
+}
+
+inline istream & operator>> (istream & is, dh::full_dboxes & b)
+{
+ return b.input (is);
+}
+
+inline istream & operator>> (istream & is, dh::fast_dboxes & b)
+{
+ return b.input (is);
+}
+
inline ostream & operator<< (ostream & os, dh::dboxes const & b)
{
return b.output (os);
}
+inline ostream & operator<< (ostream & os, dh::full_dboxes const & b)
+{
+ return b.output (os);
+}
+
inline ostream & operator<< (ostream & os, dh::fast_dboxes const & b)
{
return b.output (os);
diff --git a/Carpet/CarpetLib/src/dist.cc b/Carpet/CarpetLib/src/dist.cc
index c870990fb..89acacfad 100644
--- a/Carpet/CarpetLib/src/dist.cc
+++ b/Carpet/CarpetLib/src/dist.cc
@@ -1,4 +1,5 @@
#include <cassert>
+#include <typeinfo>
#include <mpi.h>
#ifdef _OPENMP
@@ -9,6 +10,8 @@
#include "cctk_Parameters.h"
#include "defs.hh"
+#include "limits.hh"
+#include "startup_time.hh"
#include "dist.hh"
@@ -20,9 +23,11 @@ namespace dist {
MPI_Comm comm_ = MPI_COMM_NULL;
- MPI_Datatype mpi_complex8;
- MPI_Datatype mpi_complex16;
- MPI_Datatype mpi_complex32;
+ MPI_Datatype mpi_complex8 = MPI_DATATYPE_NULL;
+ MPI_Datatype mpi_complex16 = MPI_DATATYPE_NULL;
+ MPI_Datatype mpi_complex32 = MPI_DATATYPE_NULL;
+
+ int total_num_threads_ = -1;
void init (int& argc, char**& argv) {
MPI_Init (&argc, &argv);
@@ -34,19 +39,32 @@ namespace dist {
#ifdef HAVE_CCTK_REAL4
CCTK_REAL4 dummy4;
- MPI_Type_contiguous (2, datatype(dummy4), &mpi_complex8);
+ MPI_Type_contiguous (2, mpi_datatype(dummy4), &mpi_complex8);
MPI_Type_commit (&mpi_complex8);
#endif
#ifdef HAVE_CCTK_REAL8
CCTK_REAL8 dummy8;
- MPI_Type_contiguous (2, datatype(dummy8), &mpi_complex16);
+ MPI_Type_contiguous (2, mpi_datatype(dummy8), &mpi_complex16);
MPI_Type_commit (&mpi_complex16);
#endif
#ifdef HAVE_CCTK_REAL16
CCTK_REAL16 dummy16;
- MPI_Type_contiguous (2, datatype(dummy16), &mpi_complex32);
+ MPI_Type_contiguous (2, mpi_datatype(dummy16), &mpi_complex32);
MPI_Type_commit (&mpi_complex32);
#endif
+
+ // Output startup time
+ // cerr << "QQQ: pseudoinit[1]" << endl;
+ CarpetLib::output_startup_time ();
+ // cerr << "QQQ: pseudoinit[2]" << endl;
+
+ // Check and/or modify system limits
+ CarpetLib::set_system_limits ();
+ // cerr << "QQQ: pseudoinit[3]" << endl;
+
+ // cerr << "QQQ: pseudoinit[4]" << endl;
+ collect_total_num_threads ();
+ // cerr << "QQQ: pseudoinit[5]" << endl;
}
void finalize () {
@@ -56,10 +74,27 @@ namespace dist {
// Create an MPI datatype from a C datatype description
- void create_mpi_datatype (size_t const count,
- mpi_struct_descr_t const descr[],
- MPI_Datatype & newtype)
+
+ ostream& operator<< (ostream& os, mpi_struct_descr_t const& descr)
{
+ int type_size;
+ MPI_Type_size (descr.type, &type_size);
+ os << "{"
+ << "blocklength:" << descr.blocklength << ","
+ << "displacement:" << descr.displacement << ","
+ << "type:" << descr.type << ","
+ << "type_size:" << type_size << ","
+ << "field_name:" << descr.field_name << ","
+ << "type_name:" << descr.type_name
+ << "}";
+ return os;
+ }
+
+ MPI_Datatype create_mpi_datatype (size_t const count,
+ mpi_struct_descr_t const descr[],
+ char const * const name, size_t const size)
+ {
+ DECLARE_CCTK_PARAMETERS;
int blocklengths[count];
MPI_Aint displacements[count];
MPI_Datatype types[count];
@@ -68,10 +103,128 @@ namespace dist {
displacements[n] = descr[n].displacement;
types [n] = descr[n].type;
}
+ MPI_Datatype newtype;
MPI_Type_struct (count, blocklengths, displacements, types, &newtype);
MPI_Type_commit (&newtype);
+ if (verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "Creating new MPI type for C type %s:", name);
+ cout << " Type has " << count << " components" << endl;
+ for (size_t n=0; n<count; ++n) {
+ cout << " [" << n << "]: " << descr[n] << endl;
+ }
+ cout << " New MPI type ID is " << newtype << endl;
+ int datatypesize;
+ MPI_Type_size (newtype, &datatypesize);
+ cout << " C type size is " << size << endl;
+ cout << " MPI type size is " << datatypesize << endl;
+ }
+ return newtype;
+ }
+
+#if 0
+
+ ostream&
+ generic_mpi_datatype_t::field_t::output (ostream& os) const
+ {
+ int type_size;
+ MPI_Type_size (mpi_datatype, &type_size);
+ os << "{"
+ << "offset:" << offset << ","
+ << "count:" << count << ","
+ << "mpi_datatype:" << mpi_datatype << ","
+ << "type_size:" << type_size << ","
+ << "field_name:" << field_name << ","
+ << "type_name:" << type_name
+ << "}";
+ return os;
+ }
+
+ generic_mpi_datatype_t::generic_mpi_datatype_t (string const type_name_)
+ : type_name (type_name_), type_is_committed (false)
+ {
+ }
+
+ template <typename U>
+ void
+ generic_mpi_datatype_t::add_field (size_t const offset, size_t const count,
+ string const field_name)
+ {
+ assert (not type_is_committed);
+ U u;
+ entries.push_back (field_t (offset, count, mpi_datatype(u),
+ field_name, typeid(U).name()));
+ }
+
+ void
+ generic_mpi_datatype_t::commit ()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ // Debug output
+ if (verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "Creating new MPI type for C type %s:", type_name.c_str());
+ cout << *this;
+ }
+
+ assert (not type_is_committed);
+ type_is_committed = true;
+
+ // Out of caution -- this could be allowed
+ assert (not entries.empty());
+
+ // Create MPI type
+ size_t const count = entries.size();
+ int blocklengths [count+1];
+ MPI_Aint displacements[count+1];
+ MPI_Datatype types [count+1];
+ {
+ size_t n = 0;
+ for (list<field_t>::const_iterator ifield =
+ entries.begin(); ifield!=entries.end(); ++ifield, ++n)
+ {
+ blocklengths [n] = ifield->count;
+ displacements[n] = ifield->offset;
+ types [n] = ifield->mpi_datatype;
+ }
+ assert (n == count);
+ // Add MPI_UB
+ blocklengths [n] = 1;
+ displacements[n] = type_size();
+ types [n] = MPI_UB;
+ }
+
+ MPI_Type_struct
+ (count+1, blocklengths, displacements, types, &mpi_datatype);
+ MPI_Type_commit (&mpi_datatype);
+ }
+
+ ostream&
+ generic_mpi_datatype_t::output (ostream& os) const
+ {
+ cout << "Datatype: " << type_name << endl;
+ size_t const count = entries.size();
+ cout << " Type has " << count << " components" << endl;
+ {
+ size_t n = 0;
+ for (list<field_t>::const_iterator ifield =
+ entries.begin(); ifield!=entries.end(); ++ifield, ++n)
+ {
+ cout << " [" << n << "]: " << *ifield << endl;
+ }
+ assert (n == count);
+ }
+ cout << " MPI type ID: " << mpi_datatype << endl;
+ int datatypesize;
+ MPI_Type_size (mpi_datatype, &datatypesize);
+ cout << " C type size: " << size << endl;
+ cout << " MPI type size: " << datatypesize << endl;
+ return os;
}
+#endif
+
void checkpoint (const char* file, int line) {
@@ -105,15 +258,48 @@ namespace dist {
}
// Global number of threads
- int total_num_threads_worker ()
+ void collect_total_num_threads ()
{
- int total_num_threads_;
int const mynthreads = num_threads();
+ // cerr << "QQQ: collect_total_num_threads[1]" << endl;
MPI_Allreduce
(const_cast <int *> (& mynthreads), & total_num_threads_, 1, MPI_INT,
MPI_SUM, comm());
+ // cerr << "QQQ: collect_total_num_threads[2]" << endl;
assert (total_num_threads_ >= size());
- return total_num_threads_;
}
-
+
+
+
+ char const * c_datatype_name (unsigned const type)
+ {
+ switch (type) {
+ case 0: return "char";
+ case 1: return "signed char";
+ case 2: return "unsigned char";
+ case 3: return "short";
+ case 4: return "unsigned short";
+ case 5: return "int";
+ case 6: return "unsigned int";
+ case 7: return "long";
+ case 8: return "unsigned long";
+ case 9: return "long long";
+ case 10: return "unsigned long long";
+ case 11: return "float";
+ case 12: return "double";
+ case 13: return "long double";
+#ifdef HAVE_CCTK_COMPLEX8
+ case 14: return "CCTK_COMPLEX8";
+#endif
+#ifdef HAVE_CCTK_COMPLEX16
+ case 15: return "CCTK_COMPLEX16";
+#endif
+#ifdef HAVE_CCTK_COMPLEX32
+ case 16: return "CCTK_COMPLEX32";
+#endif
+ }
+ assert (0); abort();
+ return NULL;
+ }
+
} // namespace dist
diff --git a/Carpet/CarpetLib/src/dist.hh b/Carpet/CarpetLib/src/dist.hh
index 6868d85ce..091da31e4 100644
--- a/Carpet/CarpetLib/src/dist.hh
+++ b/Carpet/CarpetLib/src/dist.hh
@@ -4,6 +4,7 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
+#include <iostream>
#include <mpi.h>
#ifdef _OPENMP
@@ -26,20 +27,102 @@ namespace dist {
extern MPI_Datatype mpi_complex16;
extern MPI_Datatype mpi_complex32;
+ extern int total_num_threads_;
+
void init (int& argc, char**& argv);
void pseudoinit (MPI_Comm const c);
void finalize ();
+
+
// Create MPI datatypes from C structures
+
struct mpi_struct_descr_t {
int blocklength;
MPI_Aint displacement;
MPI_Datatype type;
+ char const * field_name;
+ char const * type_name;
+ };
+
+ ostream& operator<< (ostream& os, mpi_struct_descr_t const& descr);
+
+ MPI_Datatype create_mpi_datatype (size_t const count,
+ mpi_struct_descr_t const descr[],
+ char const * name, size_t size);
+#if 0
+
+ class generic_mpi_datatype_t {
+
+ string const type_name;
+ virtual size_t type_size() const = 0;
+
+ struct field_t {
+ size_t offset;
+ size_t count;
+ MPI_Datatype mpi_datatype;
+ string field_name;
+ string type_name;
+ field_t (size_t const offset_,
+ size_t const count_,
+ MPI_Datatype const mpi_datatype_,
+ string const field_name_,
+ string const type_name_)
+ : offset(offset_),
+ count(count_),
+ mpi_datatype(mpi_datatype_),
+ field_name(field_name_),
+ type_name(type_name_)
+ {
+ }
+ ostream& output (ostream& os) const;
+ };
+ friend ostream& operator<< (ostream& os,
+ generic_mpi_datatype_t::field_t const& field);
+
+ list<field_t> entries;
+
+ bool type_is_committed;
+ MPI_Datatype mpi_datatype;
+
+ public:
+
+ generic_mpi_datatype_t (string const type_name_);
+
+ template <typename U>
+ void add_field (size_t offset, size_t count, string field_name);
+
+ void commit ();
+
+ MPI_Datatype get () const
+ {
+ assert (type_is_committed);
+ return mpi_datatype;
+ }
+
+ ostream& output (ostream& os) const;
+ };
+
+ template <typename T>
+ class mpi_datatype_t: public generic_mpi_datatype_t {
+ virtual size_t type_size() const
+ {
+ return sizeof(T);
+ }
};
+
+ inline ostream& operator<< (ostream& os,
+ generic_mpi_datatype_t::field_t const& field)
+ {
+ return field.output(os);
+ }
- void create_mpi_datatype (size_t const count,
- mpi_struct_descr_t const descr[],
- MPI_Datatype & newtype);
+ inline ostream& operator<< (ostream& os, generic_mpi_datatype_t const& type)
+ {
+ return type.output(os);
+ }
+
+#endif
@@ -52,18 +135,21 @@ namespace dist {
// Information about the communicator
// Return the communicator
+ inline MPI_Comm comm () CCTK_ATTRIBUTE_CONST;
inline MPI_Comm comm ()
{
return comm_;
}
// Always return a good communicator
+ inline MPI_Comm goodcomm () CCTK_ATTRIBUTE_CONST;
inline MPI_Comm goodcomm ()
{
return comm_ != MPI_COMM_NULL ? comm_ : MPI_COMM_WORLD;
}
// Rank in the communicator (this processor's number, 0 .. size-1)
+ inline int rank () CCTK_ATTRIBUTE_CONST;
inline int rank ()
{
static int rank_ = -1;
@@ -72,6 +158,7 @@ namespace dist {
}
// Size of the communicator
+ inline int size () CCTK_ATTRIBUTE_CONST;
inline int size ()
{
static int size_ = -1;
@@ -83,6 +170,7 @@ namespace dist {
void set_num_threads (int num_threads);
// Local number of threads
+ inline int num_threads () CCTK_ATTRIBUTE_CONST;
inline int num_threads ()
{
static int num_threads_ = -1;
@@ -98,13 +186,10 @@ namespace dist {
}
// Global number of threads
- int total_num_threads_worker ();
+ void collect_total_num_threads ();
+ inline int total_num_threads () CCTK_ATTRIBUTE_CONST;
inline int total_num_threads ()
{
- static int total_num_threads_ = -1;
- if (total_num_threads_ == -1) {
- total_num_threads_ = total_num_threads_worker();
- }
return total_num_threads_;
}
@@ -114,168 +199,248 @@ namespace dist {
// C Datatype helpers
// Map a C datatype to a 0-based index running up to c_ndatatypes().
/////////////////////////////////////////////////////////////////////////
+ inline unsigned int c_datatype (const char&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const char&)
{ return 0; }
- inline unsigned int c_datatype (const signed char&)
+ inline unsigned int c_datatype (const signed char&) CCTK_ATTRIBUTE_CONST;
+ inline unsigned int c_datatype (const signed char&)
{ return 1; }
+ inline unsigned int c_datatype (const unsigned char&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const unsigned char&)
{ return 2; }
+ inline unsigned int c_datatype (const short&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const short&)
{ return 3; }
+ inline unsigned int c_datatype (const unsigned short&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const unsigned short&)
{ return 4; }
+ inline unsigned int c_datatype (const int&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const int&)
{ return 5; }
+ inline unsigned int c_datatype (const unsigned int&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const unsigned int&)
{ return 6; }
+ inline unsigned int c_datatype (const long&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const long&)
{ return 7; }
+ inline unsigned int c_datatype (const unsigned long&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const unsigned long&)
{ return 8; }
+ inline unsigned int c_datatype (const long long&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const long long&)
{ return 9; }
- inline unsigned int c_datatype (const float&)
+ inline unsigned int c_datatype (const unsigned long long&) CCTK_ATTRIBUTE_CONST;
+ inline unsigned int c_datatype (const unsigned long long&)
{ return 10; }
- inline unsigned int c_datatype (const double&)
+ inline unsigned int c_datatype (const float&) CCTK_ATTRIBUTE_CONST;
+ inline unsigned int c_datatype (const float&)
{ return 11; }
- inline unsigned int c_datatype (const long double&)
+ inline unsigned int c_datatype (const double&) CCTK_ATTRIBUTE_CONST;
+ inline unsigned int c_datatype (const double&)
{ return 12; }
+ inline unsigned int c_datatype (const long double&) CCTK_ATTRIBUTE_CONST;
+ inline unsigned int c_datatype (const long double&)
+ { return 13; }
+
#ifdef HAVE_CCTK_COMPLEX8
+ inline unsigned int c_datatype (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const CCTK_COMPLEX8&)
- { return 13; }
+ { return 14; }
#endif
#ifdef HAVE_CCTK_COMPLEX16
+ inline unsigned int c_datatype (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const CCTK_COMPLEX16&)
- { return 14; }
+ { return 15; }
#endif
#ifdef HAVE_CCTK_COMPLEX32
+ inline unsigned int c_datatype (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST;
inline unsigned int c_datatype (const CCTK_COMPLEX32&)
- { return 15; }
+ { return 16; }
#endif
// keep this function's return code consistent with functions above
+ inline unsigned int c_ndatatypes () CCTK_ATTRIBUTE_CONST;
inline unsigned int c_ndatatypes ()
- { return 16; }
+ { return 17; }
template <typename T> unsigned int c_datatype () { abort(); }
- template<> inline unsigned int c_datatype <char> () { return 0; }
- template<> inline unsigned int c_datatype <signed char> () { return 1; }
- template<> inline unsigned int c_datatype <unsigned char> () { return 2; }
- template<> inline unsigned int c_datatype <short> () { return 3; }
- template<> inline unsigned int c_datatype <unsigned short> () { return 4; }
- template<> inline unsigned int c_datatype <int> () { return 5; }
- template<> inline unsigned int c_datatype <unsigned int> () { return 6; }
- template<> inline unsigned int c_datatype <long> () { return 7; }
- template<> inline unsigned int c_datatype <unsigned long> () { return 8; }
- template<> inline unsigned int c_datatype <long long> () { return 9; }
- template<> inline unsigned int c_datatype <float> () { return 10; }
- template<> inline unsigned int c_datatype <double> () { return 11; }
- template<> inline unsigned int c_datatype <long double> () { return 12; }
+ template<> inline unsigned int c_datatype <char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <char> () { return 0; }
+ template<> inline unsigned int c_datatype <signed char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <signed char> () { return 1; }
+ template<> inline unsigned int c_datatype <unsigned char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <unsigned char> () { return 2; }
+ template<> inline unsigned int c_datatype <short> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <short> () { return 3; }
+ template<> inline unsigned int c_datatype <unsigned short> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <unsigned short> () { return 4; }
+ template<> inline unsigned int c_datatype <int> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <int> () { return 5; }
+ template<> inline unsigned int c_datatype <unsigned int> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <unsigned int> () { return 6; }
+ template<> inline unsigned int c_datatype <long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <long> () { return 7; }
+ template<> inline unsigned int c_datatype <unsigned long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <unsigned long> () { return 8; }
+ template<> inline unsigned int c_datatype <long long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <long long> () { return 9; }
+ template<> inline unsigned int c_datatype <unsigned long long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <unsigned long long> () { return 10; }
+ template<> inline unsigned int c_datatype <float> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <float> () { return 11; }
+ template<> inline unsigned int c_datatype <double> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <double> () { return 12; }
+ template<> inline unsigned int c_datatype <long double> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <long double> () { return 13; }
#ifdef HAVE_CCTK_COMPLEX8
- template<> inline unsigned int c_datatype <CCTK_COMPLEX8> () { return 13; }
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX8> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX8> () { return 14; }
#endif
#ifdef HAVE_CCTK_COMPLEX16
- template<> inline unsigned int c_datatype <CCTK_COMPLEX16> () { return 14; }
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX16> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX16> () { return 15; }
#endif
#ifdef HAVE_CCTK_COMPLEX32
- template<> inline unsigned int c_datatype <CCTK_COMPLEX32> () { return 15; }
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX32> () CCTK_ATTRIBUTE_CONST;
+ template<> inline unsigned int c_datatype <CCTK_COMPLEX32> () { return 16; }
#endif
+ // Map a C datatype index to a string
+ char const * c_datatype_name (unsigned type) CCTK_ATTRIBUTE_CONST;
+
/////////////////////////////////////////////////////////////////
// MPI Datatype helpers
// Map a C datatype to its corresponding MPI datatype.
/////////////////////////////////////////////////////////////////
- inline MPI_Datatype datatype (const char&)
+ inline MPI_Datatype mpi_datatype (const char&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const char&)
{ return MPI_CHAR; }
- inline MPI_Datatype datatype (const signed char&)
+ inline MPI_Datatype mpi_datatype (const signed char&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const signed char&)
{ return MPI_CHAR; }
- inline MPI_Datatype datatype (const unsigned char&)
+ inline MPI_Datatype mpi_datatype (const unsigned char&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const unsigned char&)
{ return MPI_UNSIGNED_CHAR; }
- inline MPI_Datatype datatype (const short&)
+ inline MPI_Datatype mpi_datatype (const short&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const short&)
{ return MPI_SHORT; }
- inline MPI_Datatype datatype (const unsigned short&)
+ inline MPI_Datatype mpi_datatype (const unsigned short&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const unsigned short&)
{ return MPI_UNSIGNED_SHORT; }
- inline MPI_Datatype datatype (const int&)
+ inline MPI_Datatype mpi_datatype (const int&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const int&)
{ return MPI_INT; }
- inline MPI_Datatype datatype (const unsigned int&)
+ inline MPI_Datatype mpi_datatype (const unsigned int&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const unsigned int&)
{ return MPI_UNSIGNED; }
- inline MPI_Datatype datatype (const long&)
+ inline MPI_Datatype mpi_datatype (const long&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const long&)
{ return MPI_LONG; }
- inline MPI_Datatype datatype (const unsigned long&)
+ inline MPI_Datatype mpi_datatype (const unsigned long&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const unsigned long&)
{ return MPI_UNSIGNED_LONG; }
- inline MPI_Datatype datatype (const long long&)
+ inline MPI_Datatype mpi_datatype (const long long&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const long long&)
{ return MPI_LONG_LONG_INT; }
- inline MPI_Datatype datatype (const float&)
+ inline MPI_Datatype mpi_datatype (const unsigned long long&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const unsigned long long&)
+ { return MPI_LONG_LONG_INT; } // should be unsigned, but this doesn't exist
+
+ inline MPI_Datatype mpi_datatype (const float&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const float&)
{ return MPI_FLOAT; }
- inline MPI_Datatype datatype (const double&)
+ inline MPI_Datatype mpi_datatype (const double&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const double&)
{ return MPI_DOUBLE; }
- inline MPI_Datatype datatype (const long double&)
+ inline MPI_Datatype mpi_datatype (const long double&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const long double&)
{ return MPI_LONG_DOUBLE; }
#ifdef HAVE_CCTK_COMPLEX8
- inline MPI_Datatype datatype (const CCTK_COMPLEX8&)
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX8&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX8&)
{ return mpi_complex8; }
#endif
#ifdef HAVE_CCTK_COMPLEX16
- inline MPI_Datatype datatype (const CCTK_COMPLEX16&)
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX16&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX16&)
{ return mpi_complex16; }
#endif
#ifdef HAVE_CCTK_COMPLEX32
- inline MPI_Datatype datatype (const CCTK_COMPLEX32&)
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX32&) CCTK_ATTRIBUTE_CONST;
+ inline MPI_Datatype mpi_datatype (const CCTK_COMPLEX32&)
{ return mpi_complex32; }
#endif
- template <typename T> MPI_Datatype datatype () { abort(); }
- template<> inline MPI_Datatype datatype <char> () { return MPI_CHAR; }
- template<> inline MPI_Datatype datatype <signed char> () { return MPI_CHAR; }
- template<> inline MPI_Datatype datatype <unsigned char> () { return MPI_UNSIGNED_CHAR; }
- template<> inline MPI_Datatype datatype <short> () { return MPI_SHORT; }
- template<> inline MPI_Datatype datatype <unsigned short> () { return MPI_UNSIGNED_SHORT; }
- template<> inline MPI_Datatype datatype <int> () { return MPI_INT; }
- template<> inline MPI_Datatype datatype <unsigned int> () { return MPI_UNSIGNED; }
- template<> inline MPI_Datatype datatype <long> () { return MPI_LONG; }
- template<> inline MPI_Datatype datatype <unsigned long> () { return MPI_UNSIGNED_LONG; }
- template<> inline MPI_Datatype datatype <long long> () { return MPI_LONG_LONG_INT; }
- template<> inline MPI_Datatype datatype <float> () { return MPI_FLOAT; }
- template<> inline MPI_Datatype datatype <double> () { return MPI_DOUBLE; }
- template<> inline MPI_Datatype datatype <long double> () { return MPI_LONG_DOUBLE; }
+ template <typename T> MPI_Datatype mpi_datatype () { abort(); }
+ template<> inline MPI_Datatype mpi_datatype <char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <char> () { return MPI_CHAR; }
+ template<> inline MPI_Datatype mpi_datatype <signed char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <signed char> () { return MPI_CHAR; }
+ template<> inline MPI_Datatype mpi_datatype <unsigned char> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <unsigned char> () { return MPI_UNSIGNED_CHAR; }
+ template<> inline MPI_Datatype mpi_datatype <short> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <short> () { return MPI_SHORT; }
+ template<> inline MPI_Datatype mpi_datatype <unsigned short> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <unsigned short> () { return MPI_UNSIGNED_SHORT; }
+ template<> inline MPI_Datatype mpi_datatype <int> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <int> () { return MPI_INT; }
+ template<> inline MPI_Datatype mpi_datatype <unsigned int> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <unsigned int> () { return MPI_UNSIGNED; }
+ template<> inline MPI_Datatype mpi_datatype <long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <long> () { return MPI_LONG; }
+ template<> inline MPI_Datatype mpi_datatype <unsigned long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <unsigned long> () { return MPI_UNSIGNED_LONG; }
+ template<> inline MPI_Datatype mpi_datatype <long long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <long long> () { return MPI_LONG_LONG_INT; }
+ template<> inline MPI_Datatype mpi_datatype <unsigned long long> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <unsigned long long> () { return MPI_LONG_LONG_INT; } // should be unsigned, but this doesn't exist
+ template<> inline MPI_Datatype mpi_datatype <float> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <float> () { return MPI_FLOAT; }
+ template<> inline MPI_Datatype mpi_datatype <double> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <double> () { return MPI_DOUBLE; }
+ template<> inline MPI_Datatype mpi_datatype <long double> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <long double> () { return MPI_LONG_DOUBLE; }
#ifdef HAVE_CCTK_COMPLEX8
- template<> inline MPI_Datatype datatype <CCTK_COMPLEX8> () { return mpi_complex8; }
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX8> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX8> () { return mpi_complex8; }
#endif
#ifdef HAVE_CCTK_COMPLEX16
- template<> inline MPI_Datatype datatype <CCTK_COMPLEX16> () { return mpi_complex16; }
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX16> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX16> () { return mpi_complex16; }
#endif
#ifdef HAVE_CCTK_COMPLEX32
- template<> inline MPI_Datatype datatype <CCTK_COMPLEX32> () { return mpi_complex32; }
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX32> () CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype <CCTK_COMPLEX32> () { return mpi_complex32; }
#endif
} // namespace dist
diff --git a/Carpet/CarpetLib/src/fulltree.cc b/Carpet/CarpetLib/src/fulltree.cc
index cb6de5116..3234a73dc 100644
--- a/Carpet/CarpetLib/src/fulltree.cc
+++ b/Carpet/CarpetLib/src/fulltree.cc
@@ -15,6 +15,8 @@ fulltree<T,D,P>::fulltree ()
: type (type_empty)
{
assert (invariant());
+ // This is unused
+ assert (0);
}
@@ -193,6 +195,15 @@ fulltree<T,D,P>::const_iterator::const_iterator (fulltree const & f_)
if (f.is_branch()) {
assert (f.subtrees.size() > 0);
it = new const_iterator (* f.subtrees.at(i));
+ while ((*it).done()) {
+ delete it;
+ it = 0;
+ ++ i;
+ if (done()) break;
+ // to do: use a new function "reset iterator" instead
+ it = new const_iterator (* f.subtrees.at(i));
+ }
+ assert (done() or not (*it).done());
}
}
@@ -253,6 +264,7 @@ fulltree<T,D,P>::const_iterator::operator++ ()
++ i;
} else {
++ *it;
+#if 0
if ((*it).done()) {
delete it;
it = 0;
@@ -260,8 +272,19 @@ fulltree<T,D,P>::const_iterator::operator++ ()
if (not done()) {
// to do: use a new function "reset iterator" instead
it = new const_iterator (* f.subtrees.at(i));
+ assert (not (*it).done());
}
}
+#endif
+ while ((*it).done()) {
+ delete it;
+ it = 0;
+ ++ i;
+ if (done()) break;
+ // to do: use a new function "reset iterator" instead
+ it = new const_iterator (* f.subtrees.at(i));
+ }
+ assert (done() or not (*it).done());
}
return *this;
}
@@ -289,6 +312,15 @@ fulltree<T,D,P>::iterator::iterator (fulltree & f_)
if (f.is_branch()) {
assert (f.subtrees.size() > 0);
it = new iterator (* f.subtrees.at(i));
+ while ((*it).done()) {
+ delete it;
+ it = 0;
+ ++ i;
+ if (done()) break;
+ // to do: use a new function "reset iterator" instead
+ it = new iterator (* f.subtrees.at(i));
+ }
+ assert (done() or not (*it).done());
}
}
@@ -349,6 +381,7 @@ fulltree<T,D,P>::iterator::operator++ ()
++ i;
} else {
++ *it;
+#if 0
if ((*it).done()) {
delete it;
it = 0;
@@ -356,8 +389,19 @@ fulltree<T,D,P>::iterator::operator++ ()
if (not done()) {
// to do: use a new function "reset iterator" instead
it = new iterator (* f.subtrees.at(i));
+ assert (not (*it).done());
}
}
+#endif
+ while ((*it).done()) {
+ delete it;
+ it = 0;
+ ++ i;
+ if (done()) break;
+ // to do: use a new function "reset iterator" instead
+ it = new iterator (* f.subtrees.at(i));
+ }
+ assert (done() or not (*it).done());
}
return *this;
}
@@ -409,7 +453,7 @@ fulltree<T,D,P>::output (ostream & os) const
<< "dir=" << dir << ","
<< "subtrees=[";
for (size_t i=0; i<subtrees.size(); ++i) {
- os << bounds.at(i) << ":[" << i << "]=" << subtrees.at(i) << ":";
+ os << bounds.at(i) << ":[" << i << "]=" << *subtrees.at(i) << ":";
}
os << bounds.at(subtrees.size()) << "]";
} else {
diff --git a/Carpet/CarpetLib/src/fulltree.hh b/Carpet/CarpetLib/src/fulltree.hh
index 4678759b6..82d09be02 100644
--- a/Carpet/CarpetLib/src/fulltree.hh
+++ b/Carpet/CarpetLib/src/fulltree.hh
@@ -163,7 +163,7 @@ public:
#endif
// Memory usage
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
// Output helper
void output (ostream & os) const;
@@ -173,6 +173,8 @@ public:
// Memory usage
template <typename T, int D, typename P>
+inline size_t memoryof (fulltree<T,D,P> const & f) CCTK_ATTRIBUTE_PURE;
+template <typename T, int D, typename P>
inline size_t memoryof (fulltree<T,D,P> const & f) { return f.memory(); }
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index 39c9ebf8c..e1888835c 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -26,34 +26,25 @@ using namespace CarpetLib;
-// Hand out the next MPI tag
-static int nexttag ()
-{
- DECLARE_CCTK_PARAMETERS;
-
- int const min_tag = 100;
- static int last = 0;
- ++last;
- if (last >= 30000) last = 0;
- return min_tag + last;
-}
+list<gdata*> gdata::allgdata;
// Constructors
gdata::gdata (const int varindex_,
const centering cent_,
- const operator_type transport_operator_,
- const int tag_)
+ const operator_type transport_operator_)
: _storage(NULL),
varindex(varindex_),
cent(cent_),
transport_operator(transport_operator_),
_has_storage(false),
- comm_active(false),
- tag(tag_ >= 0 ? tag_ : nexttag())
+ comm_active(false)
{
DECLARE_CCTK_PARAMETERS;
+
+ allgdatai = allgdata.insert(allgdata.end(), this);
+
if (barriers) {
MPI_Barrier (dist::comm());
}
@@ -63,6 +54,9 @@ gdata::gdata (const int varindex_,
gdata::~gdata ()
{
DECLARE_CCTK_PARAMETERS;
+
+ allgdata.erase(allgdatai);
+
if (barriers) {
MPI_Barrier (dist::comm());
}
@@ -76,14 +70,17 @@ void
gdata::
copy_from (comm_state & state,
gdata const * const src,
- ibbox const & box)
+ ibbox const & box,
+ int const dstproc,
+ int const srcproc)
{
- vector <gdata const *> srcs (1, src);
+ vector <gdata const *> const srcs (1, src);
CCTK_REAL const time = 0.0;
- vector <CCTK_REAL> times (1, time);
+ vector <CCTK_REAL> const times (1, time);
transfer_from (state,
srcs, times,
box, box,
+ dstproc, srcproc,
time, 0, 0);
}
@@ -96,37 +93,52 @@ transfer_from (comm_state & state,
vector<CCTK_REAL> const & times,
ibbox const & dstbox,
ibbox const & srcbox,
+ int const dstproc,
+ int const srcproc,
CCTK_REAL const time,
int const order_space,
int const order_time)
{
- assert (has_storage());
- assert (not dstbox.empty());
- assert (all(dstbox.lower() >= extent().lower()));
- assert (all(dstbox.upper() <= extent().upper()));
- assert (all(dstbox.stride() == extent().stride()));
- assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0));
-
- assert (not srcbox.empty());
- assert (srcs.size() == times.size() and srcs.size() > 0);
- for (int t=0; t<(int)srcs.size(); ++t) {
- assert (srcs.AT(t)->has_storage());
- assert (all(srcbox.lower() >= srcs.AT(t)->extent().lower()));
- assert (all(srcbox.upper() <= srcs.AT(t)->extent().upper()));
+ bool const is_dst = dist::rank() == dstproc;
+ bool const is_src = dist::rank() == srcproc;
+ // Return early if this communication does not concern us
+ assert (is_dst or is_src); // why should we be here?
+ if (not is_dst and not is_src) return;
+
+ if (is_dst) {
+ assert (proc() == dstproc);
+ assert (has_storage());
+ assert (not dstbox.empty());
+ assert (all(dstbox.lower() >= extent().lower()));
+ assert (all(dstbox.upper() <= extent().upper()));
+ assert (all(dstbox.stride() == extent().stride()));
+ assert (all((dstbox.lower() - extent().lower()) % dstbox.stride() == 0));
}
- gdata const * const src = srcs.AT(0);
- assert (transport_operator != op_error);
- if (transport_operator == op_none) return;
+ if (is_src) {
+ assert (not srcbox.empty());
+ assert (srcs.size() == times.size() and srcs.size() > 0);
+ for (int t=0; t<(int)srcs.size(); ++t) {
+ assert (srcs.AT(t)->proc() == srcproc);
+ assert (srcs.AT(t)->has_storage());
+ assert (all(srcbox.lower() >= srcs.AT(t)->extent().lower()));
+ assert (all(srcbox.upper() <= srcs.AT(t)->extent().upper()));
+ }
+ }
+ gdata const * const src = is_src ? srcs.AT(0) : NULL;
- // Return early if this communication does not concern us
- if (dist::rank() != proc() and dist::rank() != src->proc()) return;
+ operator_type const my_transport_operator =
+ is_dst ? transport_operator : src->transport_operator;
+ assert (my_transport_operator != op_error);
+ assert (my_transport_operator != op_none); // why should we be here?
+ if (my_transport_operator == op_none) return;
// Interpolate either on the source or on the destination processor,
// depending on whether this increases or reduces the amount of data
int timelevel0, ntimelevels;
- find_source_timelevel (times, time, order_time, timelevel0, ntimelevels);
- assert (int (srcs.size()) >= ntimelevels);
+ find_source_timelevel
+ (times, time, order_time, my_transport_operator, timelevel0, ntimelevels);
+ if (is_src) assert (int (srcs.size()) >= ntimelevels);
int const dstpoints = dstbox.size();
int const srcpoints = srcbox.size() * ntimelevels;
bool const interp_on_src = dstpoints <= srcpoints;
@@ -136,46 +148,45 @@ transfer_from (comm_state & state,
case state_get_buffer_sizes:
// don't count processor-local copies
- if (proc() != src->proc()) {
- // if this is a destination processor: advance its recv buffer
- // size
- if (proc() == dist::rank()) {
- state.reserve_recv_space (c_datatype(), src->proc(), npoints);
+ if (not (is_dst and is_src)) {
+ if (is_dst) {
+ // increment the recv buffer size
+ state.reserve_recv_space (c_datatype(), srcproc, npoints);
}
- // if this is a source processor: increment its send buffer size
- if (src->proc() == dist::rank()) {
- state.reserve_send_space (c_datatype(), proc(), npoints);
+ if (is_src) {
+ // increment the send buffer size
+ state.reserve_send_space (src->c_datatype(), dstproc, npoints);
}
}
break;
case state_fill_send_buffers:
- // if this is a source processor: copy its data into the send
- // buffer
- if (proc() != src->proc()) {
- if (src->proc() == dist::rank()) {
+ if (not (is_dst and is_src)) {
+ if (is_src) {
+ // copy the data into the send buffer
if (interp_on_src) {
- size_t const sendbufsize = c_datatype_size() * dstbox.size();
+ size_t const sendbufsize = src->c_datatype_size() * dstbox.size();
void * const sendbuf =
- state.send_buffer (c_datatype(), proc(), dstbox.size());
+ state.send_buffer (src->c_datatype(), dstproc, dstbox.size());
gdata * const buf =
- make_typed (varindex, cent, transport_operator, tag);
- buf->allocate (dstbox, src->proc(), sendbuf, sendbufsize);
+ src->make_typed (src->varindex, src->cent, src->transport_operator);
+ buf->allocate (dstbox, srcproc, sendbuf, sendbufsize);
buf->transfer_from_innerloop
(srcs, times, dstbox, time, order_space, order_time);
delete buf;
- state.commit_send_space (c_datatype(), proc(), dstbox.size());
+ state.commit_send_space (src->c_datatype(), dstproc, dstbox.size());
} else {
for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) {
- size_t const sendbufsize = c_datatype_size() * srcbox.size();
+ size_t const sendbufsize = src->c_datatype_size() * srcbox.size();
void * const sendbuf =
- state.send_buffer (c_datatype(), proc(), srcbox.size());
+ state.send_buffer (src->c_datatype(), dstproc, srcbox.size());
gdata * const buf =
- make_typed (varindex, cent, transport_operator, tag);
- buf->allocate (srcbox, src->proc(), sendbuf, sendbufsize);
+ src->make_typed (src->varindex, src->cent,
+ src->transport_operator);
+ buf->allocate (srcbox, srcproc, sendbuf, sendbufsize);
buf->copy_from_innerloop (srcs.AT(tl), srcbox);
delete buf;
- state.commit_send_space (c_datatype(), proc(), srcbox.size());
+ state.commit_send_space (src->c_datatype(), dstproc, srcbox.size());
}
}
}
@@ -184,45 +195,42 @@ transfer_from (comm_state & state,
case state_do_some_work:
// handle the processor-local case
- if (proc() == src->proc()) {
- if (proc() == dist::rank()) {
- transfer_from_innerloop
- (srcs, times, dstbox, time, order_space, order_time);
- }
+ if (is_dst and is_src) {
+ transfer_from_innerloop
+ (srcs, times, dstbox, time, order_space, order_time);
}
break;
case state_empty_recv_buffers:
- // if this is a destination processor: copy it from the recv
- // buffer
- if (proc() != src->proc()) {
- if (proc() == dist::rank()) {
+ if (not (is_dst and is_src)) {
+ if (is_dst) {
+ // copy from the recv buffer
if (interp_on_src) {
size_t const recvbufsize = c_datatype_size() * dstbox.size();
void * const recvbuf =
- state.recv_buffer (c_datatype(), src->proc(), dstbox.size());
- gdata * const buf =
- make_typed (varindex, cent, transport_operator, tag);
- buf->allocate (dstbox, proc(), recvbuf, recvbufsize);
- state.commit_recv_space (c_datatype(), src->proc(), dstbox.size());
+ state.recv_buffer (c_datatype(), srcproc, dstbox.size());
+ gdata * const buf = make_typed (varindex, cent, transport_operator);
+ buf->allocate (dstbox, dstproc, recvbuf, recvbufsize);
+ state.commit_recv_space (c_datatype(), srcproc, dstbox.size());
copy_from_innerloop (buf, dstbox);
delete buf;
} else {
- gdata const * const null = 0;
- vector <gdata const *> bufs (timelevel0 + ntimelevels, null);
- for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) {
+ gdata const * const null = NULL;
+ vector <gdata const *> bufs (ntimelevels, null);
+ vector <CCTK_REAL> timebuf (ntimelevels);
+ for (int tl = 0; tl < ntimelevels; ++ tl) {
size_t const recvbufsize = c_datatype_size() * srcbox.size();
void * const recvbuf =
- state.recv_buffer (c_datatype(), src->proc(), srcbox.size());
- gdata * const buf =
- make_typed (varindex, cent, transport_operator, tag);
- buf->allocate (srcbox, proc(), recvbuf, recvbufsize);
- state.commit_recv_space (c_datatype(), src->proc(), srcbox.size());
+ state.recv_buffer (c_datatype(), srcproc, srcbox.size());
+ gdata * const buf = make_typed (varindex, cent, transport_operator);
+ buf->allocate (srcbox, dstproc, recvbuf, recvbufsize);
+ state.commit_recv_space (c_datatype(), srcproc, srcbox.size());
bufs.AT(tl) = buf;
+ timebuf.AT(tl) = times.AT(timelevel0 + tl);
}
transfer_from_innerloop
- (bufs, times, dstbox, time, order_space, order_time);
- for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) {
+ (bufs, timebuf, dstbox, time, order_space, order_time);
+ for (int tl = 0; tl < ntimelevels; ++ tl) {
delete bufs.AT(tl);
}
}
@@ -231,7 +239,7 @@ transfer_from (comm_state & state,
break;
default:
- assert (0);
+ assert (0); abort();
}
}
@@ -242,9 +250,9 @@ gdata::
find_source_timelevel (vector <CCTK_REAL> const & times,
CCTK_REAL const time,
int const order_time,
+ operator_type const transport_operator,
int & timelevel0,
int & ntimelevels)
- const
{
// Ensure that the times are consistent
assert (times.size() > 0);
@@ -253,7 +261,8 @@ find_source_timelevel (vector <CCTK_REAL> const & times,
CCTK_REAL const eps = 1.0e-12;
CCTK_REAL const min_time = * min_element (times.begin(), times.end());
CCTK_REAL const max_time = * max_element (times.begin(), times.end());
- CCTK_REAL const some_time = abs (min_time) + abs (max_time);
+ // TODO: Use a real delta-time from somewhere instead of 1.0
+ CCTK_REAL const some_time = abs (min_time) + abs (max_time) + 1.0;
if (transport_operator != op_copy) {
if (time < min_time - eps * some_time or
time > max_time + eps * some_time)
@@ -303,3 +312,18 @@ find_source_timelevel (vector <CCTK_REAL> const & times,
assert (timelevel0 >= 0 and timelevel0 < (int)times.size());
assert (ntimelevels > 0);
}
+
+
+
+size_t
+gdata::
+allmemory ()
+{
+ size_t mem = memoryof(allgdata);
+ for (list<gdata*>::const_iterator
+ gdatai = allgdata.begin(); gdatai != allgdata.end(); ++ gdatai)
+ {
+ mem += memoryof(**gdatai);
+ }
+ return mem;
+}
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index 09622fb34..4b62cf564 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -24,14 +24,19 @@ using namespace std;
// A generic data storage without type information
class gdata {
+
+ static list<gdata*> allgdata;
+ list<gdata*>::iterator allgdatai;
protected: // should be readonly
// Fields
void * _storage; // A copy of the storage pointer
-
+
+public:
const int varindex; // Cactus variable index, or -1
+protected:
centering cent;
operator_type transport_operator;
@@ -47,8 +52,6 @@ protected: // should be readonly
bool comm_active; // a communication is going on
MPI_Request request; // outstanding MPI request
- int tag; // MPI tag for this object
-
private:
// Forbid copying and passing by value
gdata (gdata const &);
@@ -59,8 +62,7 @@ public:
// Constructors
gdata (const int varindex,
const centering cent = error_centered,
- const operator_type transport_operator = op_error,
- const int tag = -1);
+ const operator_type transport_operator = op_error);
// Destructors
virtual ~gdata ();
@@ -69,8 +71,7 @@ public:
virtual gdata*
make_typed (const int varindex,
const centering cent = error_centered,
- const operator_type transport_operator = op_error,
- const int tag = -1) const = 0;
+ const operator_type transport_operator = op_error) const = 0;
// Storage management
virtual void allocate (const ibbox& extent, const int proc,
@@ -148,7 +149,9 @@ public:
void
copy_from (comm_state & state,
gdata const * src,
- ibbox const & box);
+ ibbox const & box,
+ int dstproc,
+ int srcproc);
void
transfer_from (comm_state & state,
@@ -156,18 +159,21 @@ public:
vector<CCTK_REAL> const & times,
ibbox const & dstbox,
ibbox const & srcbox,
+ int dstproc,
+ int srcproc,
CCTK_REAL time,
int order_space,
int order_time);
protected:
+ static
void
find_source_timelevel (vector <CCTK_REAL> const & times,
CCTK_REAL time,
int order_time,
+ operator_type transport_operator,
int & timelevel0,
- int & ntimelevels)
- const;
+ int & ntimelevels);
private:
virtual
@@ -186,8 +192,25 @@ private:
int order_time)
= 0;
+public:
+ virtual size_t memory () const CCTK_ATTRIBUTE_PURE = 0;
+ static size_t allmemory () CCTK_ATTRIBUTE_PURE;
+ virtual ostream& output (ostream& os) const = 0;
};
+inline size_t memoryof (gdata const & d) CCTK_ATTRIBUTE_PURE;
+inline size_t memoryof (gdata const & d)
+{
+ return d.memory ();
+}
+
+inline ostream& operator<< (ostream& os, const gdata& d)
+{
+ return d.output(os);
+}
+
+
+
#endif // GDATA_HH
diff --git a/Carpet/CarpetLib/src/gf.cc b/Carpet/CarpetLib/src/gf.cc
index 37b06db75..696628c59 100644
--- a/Carpet/CarpetLib/src/gf.cc
+++ b/Carpet/CarpetLib/src/gf.cc
@@ -41,23 +41,23 @@ gf<T>::~gf ()
// Access to the data
template<typename T>
-const data<T>* gf<T>::operator() (int tl, int rl, int c, int ml) const
+const data<T>* gf<T>::operator() (int tl, int rl, int lc, int ml) const
{
assert (rl>=0 and rl<h.reflevels());
- assert (c>=0 and c<h.components(rl));
+ assert (lc>=0 and lc<h.local_components(rl));
assert (ml>=0 and ml<h.mglevels());
assert (tl>=0 and tl<timelevels(ml, rl));
- return (const data<T>*)storage.AT(ml).AT(rl).AT(c).AT(tl);
+ return (const data<T>*)storage.AT(ml).AT(rl).AT(lc).AT(tl);
}
template<typename T>
-data<T>* gf<T>::operator() (int tl, int rl, int c, int ml)
+data<T>* gf<T>::operator() (int tl, int rl, int lc, int ml)
{
assert (rl>=0 and rl<h.reflevels());
- assert (c>=0 and c<h.components(rl));
+ assert (lc>=0 and lc<h.local_components(rl));
assert (ml>=0 and ml<h.mglevels());
assert (tl>=0 and tl<timelevels(ml, rl));
- return (data<T>*)storage.AT(ml).AT(rl).AT(c).AT(tl);
+ return (data<T>*)storage.AT(ml).AT(rl).AT(lc).AT(tl);
}
diff --git a/Carpet/CarpetLib/src/gf.hh b/Carpet/CarpetLib/src/gf.hh
index d5feb0a63..be0a1bb94 100644
--- a/Carpet/CarpetLib/src/gf.hh
+++ b/Carpet/CarpetLib/src/gf.hh
@@ -46,13 +46,11 @@ public:
// Helpers
-protected:
-
- virtual gdata* typed_data (int tl, int rl, int c, int ml)
+ virtual gdata* typed_data (int tl, int rl, int lc, int ml) const
{
data<T>* const vl =
this->vectorleader
- ? (data<T>*)(*this->vectorleader)(tl,rl,c,ml)
+ ? (data<T>*)(*this->vectorleader)(tl,rl,lc,ml)
: NULL;
return new data<T>(this->varindex,
h.refcent, this->transport_operator,
@@ -64,11 +62,9 @@ protected:
// Access to the data
-public:
-
- virtual const data<T>* operator() (int tl, int rl, int c, int ml) const;
+ virtual const data<T>* operator() (int tl, int rl, int lc, int ml) const;
- virtual data<T>* operator() (int tl, int rl, int c, int ml);
+ virtual data<T>* operator() (int tl, int rl, int lc, int ml);
diff --git a/Carpet/CarpetLib/src/ggf.cc b/Carpet/CarpetLib/src/ggf.cc
index 41e6787dc..d975a55a1 100644
--- a/Carpet/CarpetLib/src/ggf.cc
+++ b/Carpet/CarpetLib/src/ggf.cc
@@ -7,6 +7,8 @@
#include "cctk.h"
+#include "CarpetTimers.hh"
+
#include "defs.hh"
#include "dh.hh"
#include "th.hh"
@@ -19,6 +21,10 @@ using namespace CarpetLib;
+list<ggf*> ggf::allggf;
+
+
+
// Constructors
ggf::ggf (const int varindex_, const operator_type transport_operator_,
th& t_, dh& d_,
@@ -44,12 +50,15 @@ ggf::ggf (const int varindex_, const operator_type transport_operator_,
timelevels_.AT(ml).resize(d.h.reflevels(), 0);
}
- d.add(this);
+ allggfi = allggf.insert(allggf.end(), this);
+
+ dh_handle = d.add(this);
}
// Destructors
ggf::~ggf () {
- d.remove(this);
+ d.erase(dh_handle);
+ allggf.erase(allggfi);
}
// Comparison
@@ -69,23 +78,24 @@ void ggf::set_timelevels (const int ml, const int rl, const int new_timelevels)
if (new_timelevels < timelevels(ml,rl)) {
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
+ for (int lc=0; lc<h.local_components(rl); ++ lc) {
for (int tl=new_timelevels; tl<timelevels(ml,rl); ++tl) {
- delete storage.AT(ml).AT(rl).AT(c).AT(tl);
+ delete storage.AT(ml).AT(rl).AT(lc).AT(tl);
}
- storage.AT(ml).AT(rl).AT(c).resize (new_timelevels);
- } // for c
+ storage.AT(ml).AT(rl).AT(lc).resize (new_timelevels);
+ } // for lc
} else if (new_timelevels > timelevels(ml,rl)) {
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
- storage.AT(ml).AT(rl).AT(c).resize (new_timelevels);
+ for (int lc=0; lc<h.local_components(rl); ++ lc) {
+ int const c = h.get_component(rl,lc);
+ storage.AT(ml).AT(rl).AT(lc).resize (new_timelevels);
for (int tl=timelevels(ml,rl); tl<new_timelevels; ++tl) {
- storage.AT(ml).AT(rl).AT(c).AT(tl) = typed_data(tl,rl,c,ml);
- storage.AT(ml).AT(rl).AT(c).AT(tl)->allocate
- (d.boxes.AT(ml).AT(rl).AT(c).exterior, h.processor(rl,c));
+ storage.AT(ml).AT(rl).AT(lc).AT(tl) = typed_data(tl,rl,lc,ml);
+ storage.AT(ml).AT(rl).AT(lc).AT(tl)->allocate
+ (d.boxes.AT(ml).AT(rl).AT(c).exterior, dist::rank());
} // for tl
- } // for c
+ } // for lc
}
@@ -97,34 +107,39 @@ void ggf::set_timelevels (const int ml, const int rl, const int new_timelevels)
void ggf::recompose_crop ()
{
// Free storage that will not be needed
- static Timer timer ("ggf::recompose_crop");
+ static Carpet::Timer timer ("CarpetLib::ggf::recompose_crop");
timer.start ();
for (int ml=0; ml<h.mglevels(); ++ml) {
for (int rl=h.reflevels(); rl<(int)storage.AT(ml).size(); ++rl) {
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
- for (int tl=0; tl<(int)storage.AT(ml).AT(rl).AT(c).size(); ++tl) {
- delete storage.AT(ml).AT(rl).AT(c).AT(tl);
+ for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) {
+ for (int tl=0; tl<(int)storage.AT(ml).AT(rl).AT(lc).size(); ++tl) {
+ delete storage.AT(ml).AT(rl).AT(lc).AT(tl);
} // for tl
- } // for c
+ } // for lc
} // for rl
storage.AT(ml).resize(h.reflevels());
} // for ml
- timer.stop (0);
+ timer.stop ();
}
void ggf::recompose_allocate (const int rl)
{
// Retain storage that might be needed
- static Timer timer ("ggf::recompose_allocate");
+ static Carpet::Timer timer ("CarpetLib::ggf::recompose_allocate");
timer.start ();
oldstorage.resize(storage.size());
for (int ml=0; ml<(int)storage.size(); ++ml) {
oldstorage.AT(ml).resize(storage.AT(ml).size());
+#if 0
oldstorage.AT(ml).AT(rl) = storage.AT(ml).AT(rl);
storage.AT(ml).AT(rl).clear();
+#else
+ oldstorage.AT(ml).AT(rl).clear();
+ swap (storage.AT(ml).AT(rl), oldstorage.AT(ml).AT(rl));
+#endif
}
for (int ml=0; ml<d.h.mglevels(); ++ml) {
@@ -135,32 +150,36 @@ void ggf::recompose_allocate (const int rl)
storage.resize(h.mglevels());
for (int ml=0; ml<h.mglevels(); ++ml) {
storage.AT(ml).resize(h.reflevels());
- storage.AT(ml).AT(rl).resize(h.components(rl));
- for (int c=0; c<h.components(rl); ++c) {
- storage.AT(ml).AT(rl).AT(c).resize(timelevels(ml,rl));
+ storage.AT(ml).AT(rl).resize(h.local_components(rl));
+ for (int lc=0; lc<h.local_components(rl); ++ lc) {
+ int const c = h.get_component(rl,lc);
+ storage.AT(ml).AT(rl).AT(lc).resize(timelevels(ml,rl));
for (int tl=0; tl<timelevels(ml,rl); ++tl) {
- storage.AT(ml).AT(rl).AT(c).AT(tl) = typed_data(tl,rl,c,ml);
- storage.AT(ml).AT(rl).AT(c).AT(tl)->allocate
- (d.boxes.AT(ml).AT(rl).AT(c).exterior, h.processor(rl,c));
+ storage.AT(ml).AT(rl).AT(lc).AT(tl) = typed_data(tl,rl,lc,ml);
+ storage.AT(ml).AT(rl).AT(lc).AT(tl)->allocate
+ (d.boxes.AT(ml).AT(rl).AT(c).exterior, dist::rank());
} // for tl
- } // for c
+ } // for lc
} // for ml
- timer.stop (0);
+ timer.stop ();
}
void ggf::recompose_fill (comm_state & state, int const rl,
bool const do_prolongate)
{
// Initialise the new storage
- static Timer timer ("ggf::recompose_fill");
+ static Carpet::Timer timer ("CarpetLib::ggf::recompose_fill");
timer.start ();
-
+
for (int ml = 0; ml < h.mglevels(); ++ ml) {
+ assert (d.fast_boxes.AT(ml).AT(rl).do_init);
+
vector <int> tls;
if (do_prolongate and rl > 0 and
- transport_operator != op_none and transport_operator != op_sync)
+ transport_operator != op_none and transport_operator != op_sync and
+ transport_operator != op_restrict)
{
int const numtl = timelevels (ml, rl);
tls.resize (numtl);
@@ -185,7 +204,9 @@ void ggf::recompose_fill (comm_state & state, int const rl,
// Initialise from a coarser level of the new hierarchy, where
// possible
if (rl > 0) {
- if (transport_operator != op_none and transport_operator != op_sync) {
+ if (transport_operator != op_none and transport_operator != op_sync and
+ transport_operator != op_restrict)
+ {
for (int tl = 0; tl < timelevels (ml, rl); ++tl) {
transfer_from_all (state,
tl, rl, ml,
@@ -199,43 +220,43 @@ void ggf::recompose_fill (comm_state & state, int const rl,
} // for ml
- timer.stop (0);
+ timer.stop ();
}
void ggf::recompose_free_old (const int rl)
{
// Delete old storage
- static Timer timer ("dh::recompose_free_old");
+ static Carpet::Timer timer ("dh::recompose_free_old");
timer.start ();
for (int ml=0; ml<(int)oldstorage.size(); ++ml) {
- for (int c=0; c<(int)oldstorage.AT(ml).AT(rl).size(); ++c) {
- for (int tl=0; tl<timelevels(ml,rl); ++tl) {
- delete oldstorage.AT(ml).AT(rl).AT(c).AT(tl);
+ for (int lc=0; lc<(int)oldstorage.AT(ml).AT(rl).size(); ++lc) {
+ for (int tl=0; tl<(int)oldstorage.AT(ml).AT(rl).AT(lc).size(); ++tl) {
+ delete oldstorage.AT(ml).AT(rl).AT(lc).AT(tl);
} // for tl
- } // for c
+ } // for lc
oldstorage.AT(ml).AT(rl).clear();
} // for ml
- timer.stop (0);
+ timer.stop ();
}
void ggf::recompose_free (const int rl)
{
// Delete old storage
- static Timer timer ("dh::recompose_free");
+ static Carpet::Timer timer ("dh::recompose_free");
timer.start ();
for (int ml=0; ml<(int)storage.size(); ++ml) {
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
+ for (int lc=0; lc<h.local_components(rl); ++ lc) {
for (int tl=0; tl<timelevels(ml,rl); ++tl) {
- delete storage.AT(ml).AT(rl).AT(c).AT(tl);
+ delete storage.AT(ml).AT(rl).AT(lc).AT(tl);
} // for tl
- } // for c
+ } // for lc
storage.AT(ml).AT(rl).clear();
} // for ml
- timer.stop (0);
+ timer.stop ();
}
@@ -246,8 +267,8 @@ void ggf::cycle_all (int const rl, int const ml) {
assert (ml>=0 and ml<h.mglevels());
int const ntl = timelevels(ml,rl);
assert (ntl > 0);
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
- fdata & fdatas = storage.AT(ml).AT(rl).AT(c);
+ for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) {
+ fdata & fdatas = storage.AT(ml).AT(rl).AT(lc);
gdata * const tmpdata = fdatas.AT(ntl-1);
for (int tl=ntl-1; tl>0; --tl) {
fdatas.AT(tl) = fdatas.AT(tl-1);
@@ -260,8 +281,8 @@ void ggf::cycle_all (int const rl, int const ml) {
void ggf::flip_all (int const rl, int const ml) {
assert (rl>=0 and rl<h.reflevels());
assert (ml>=0 and ml<h.mglevels());
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
- fdata & fdatas = storage.AT(ml).AT(rl).AT(c);
+ for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) {
+ fdata & fdatas = storage.AT(ml).AT(rl).AT(lc);
for (int tl=0; tl<(timelevels(ml,rl)-1)/2; ++tl) {
const int tl1 = tl;
const int tl2 = timelevels(ml,rl)-1 - tl;
@@ -279,15 +300,13 @@ void ggf::flip_all (int const rl, int const ml) {
void ggf::fill_all (int const rl, int const ml) {
assert (rl>=0 and rl<h.reflevels());
assert (ml>=0 and ml<h.mglevels());
- for (int c=0; c<(int)storage.AT(ml).AT(rl).size(); ++c) {
- if (h.is_local(rl,c)) {
- fdata const & fdatas = storage.AT(ml).AT(rl).AT(c);
- void const * const srcptr = fdatas.AT(0)->storage();
- size_t const size = fdatas.AT(0)->size() * fdatas.AT(0)->elementsize();
- for (int tl=1; tl<timelevels(ml,rl); ++tl) {
- void * const dstptr = fdatas.AT(tl)->storage();
- memcpy (dstptr, srcptr, size);
- }
+ for (int lc=0; lc<(int)storage.AT(ml).AT(rl).size(); ++lc) {
+ fdata const & fdatas = storage.AT(ml).AT(rl).AT(lc);
+ void const * const srcptr = fdatas.AT(0)->storage();
+ size_t const size = fdatas.AT(0)->size() * fdatas.AT(0)->elementsize();
+ for (int tl=1; tl<timelevels(ml,rl); ++tl) {
+ void * const dstptr = fdatas.AT(tl)->storage();
+ memcpy (dstptr, srcptr, size);
}
}
}
@@ -322,7 +341,9 @@ ref_bnd_prolongate_all (comm_state & state,
{
// Interpolate
assert (rl>=1);
- if (transport_operator == op_none or transport_operator == op_sync) return;
+ if (transport_operator == op_none or transport_operator == op_sync or
+ transport_operator == op_restrict)
+ return;
vector<int> tl2s;
static Timer timer ("ref_bnd_prolongate_all");
timer.start ();
@@ -435,7 +456,9 @@ ref_prolongate_all (comm_state & state,
CCTK_REAL const time)
{
assert (rl>=1);
- if (transport_operator == op_none or transport_operator == op_sync) return;
+ if (transport_operator == op_none or transport_operator == op_sync or
+ transport_operator == op_restrict)
+ return;
static Timer timer ("ref_prolongate_all");
timer.start ();
vector<int> tl2s;
@@ -467,8 +490,7 @@ transfer_from_all (comm_state & state,
assert (ml1>=0 and ml1<h.mglevels());
assert (tl1>=0 and tl1<timelevels(ml1,rl1));
- int const p = dist::rank();
- srpvect const & psendrecvs = d.fast_boxes.AT(ml1).AT(rl1).AT(p).*sendrecvs;
+ srpvect const & psendrecvs = d.fast_boxes.AT(ml1).AT(rl1).*sendrecvs;
// Return early if this communication does not concern us
if (psendrecvs.empty()) return;
@@ -482,7 +504,11 @@ transfer_from_all (comm_state & state,
assert (rl2>=0 and rl2<(int)srcstorage.AT(ml2).size());
for (size_t i = 0; i < tl2s.size(); ++ i) {
int const tl2 = tl2s.AT(i);
- assert (tl2>=0 and tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(0).size());
+ assert (tl2>=0);
+ int const lc = 0;
+ if (lc < int(srcstorage.AT(ml2).AT(rl2).size())) {
+ assert (tl2<(int)srcstorage.AT(ml2).AT(rl2).AT(lc).size());
+ }
}
// Set up source times
@@ -514,14 +540,21 @@ transfer_from_all (comm_state & state,
ibbox const & recv = precv.extent;
int const c2 = psend.component;
int const c1 = precv.component;
+ int const lc2 = h.get_local_component(rl2,c2);
+ int const lc1 = h.get_local_component(rl1,c1);
+ int const p2 = h.processor(rl2,c2);
+ int const p1 = h.processor(rl1,c1);
// Source and destination data
- gdata * const dst = storage.AT(ml1).AT(rl1).AT(c1).AT(tl1);
+ gdata * const dst =
+ lc1>=0 ? storage.AT(ml1).AT(rl1).AT(lc1).AT(tl1) : NULL;
cdata const & srcs = srcstorage.AT(ml2).AT(rl2);
for (int i=0; i<(int)gsrcs.size(); ++i) {
- gsrcs.AT(i) = srcs.AT(c2).AT(tl2s.AT(i));
+ gsrcs.AT(i) = lc2>=0 ? srcs.AT(lc2).AT(tl2s.AT(i)) : NULL;
}
- dst->transfer_from (state, gsrcs, times, recv, send, time, pos, pot);
+
+ dst->transfer_from
+ (state, gsrcs, times, recv, send, p1, p2 , time, pos, pot);
}
total.stop (0);
@@ -546,3 +579,16 @@ memory ()
memoryof (vectorleader) +
memoryof (oldstorage);
}
+
+size_t
+ggf::
+allmemory ()
+{
+ size_t mem = memoryof(allggf);
+ for (list<ggf*>::const_iterator
+ ggfi = allggf.begin(); ggfi != allggf.end(); ++ ggfi)
+ {
+ mem += memoryof(**ggfi);
+ }
+ return mem;
+}
diff --git a/Carpet/CarpetLib/src/ggf.hh b/Carpet/CarpetLib/src/ggf.hh
index b2c86b8db..f89a73ee0 100644
--- a/Carpet/CarpetLib/src/ggf.hh
+++ b/Carpet/CarpetLib/src/ggf.hh
@@ -28,6 +28,9 @@ ostream& operator<< (ostream& os, const ggf& f);
// A generic grid function without type information
class ggf {
+
+ static list<ggf*> allggf;
+ list<ggf*>::iterator allggfi;
// Types
typedef list<ibbox> iblist;
@@ -38,7 +41,7 @@ class ggf {
typedef gdata* tdata; // data ...
typedef vector<tdata> fdata; // ... for each time level
- typedef vector<fdata> cdata; // ... for each component
+ typedef vector<fdata> cdata; // ... for each local component
typedef vector<cdata> rdata; // ... for each refinement level
typedef vector<rdata> mdata; // ... for each multigrid level
@@ -53,6 +56,7 @@ public: // should be readonly
const gh &h; // grid hierarchy
dh &d; // data hierarchy
+ dh::ggf_handle dh_handle;
protected:
vector<vector<int> > timelevels_; // time levels [ml][rl]
@@ -80,10 +84,10 @@ public:
virtual ~ggf ();
// Comparison
- bool operator== (const ggf& f) const;
+ bool operator== (const ggf& f) const CCTK_ATTRIBUTE_PURE;
// Querying
- int timelevels (int const ml, int const rl) const
+ int timelevels (int const ml, int const rl) const CCTK_ATTRIBUTE_PURE
{
return timelevels_.AT(ml).AT(rl);
}
@@ -141,9 +145,7 @@ public:
// Helpers
-protected:
-
- virtual gdata* typed_data (int tl, int rl, int c, int ml) = 0;
+ virtual gdata* typed_data (int tl, int rl, int lc, int ml) const = 0;
@@ -181,13 +183,14 @@ protected:
public:
// Access to the data
- virtual const gdata* operator() (int tl, int rl, int c, int ml) const = 0;
- virtual gdata* operator() (int tl, int rl, int c, int ml) = 0;
+ virtual const gdata* operator() (int tl, int rl, int lc, int ml) const CCTK_ATTRIBUTE_PURE = 0;
+ virtual gdata* operator() (int tl, int rl, int lc, int ml) CCTK_ATTRIBUTE_PURE = 0;
// Output
- virtual size_t memory () const;
+ virtual size_t memory () const CCTK_ATTRIBUTE_PURE = 0;
+ static size_t allmemory () CCTK_ATTRIBUTE_PURE;
virtual ostream& output (ostream& os) const = 0;
private:
@@ -199,6 +202,7 @@ private:
+inline size_t memoryof (ggf const & f) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (ggf const & f)
{
return f.memory ();
diff --git a/Carpet/CarpetLib/src/gh.cc b/Carpet/CarpetLib/src/gh.cc
index 9ecdb925c..21cb399a6 100644
--- a/Carpet/CarpetLib/src/gh.cc
+++ b/Carpet/CarpetLib/src/gh.cc
@@ -6,6 +6,8 @@
#include "cctk.h"
#include "cctk_Parameters.h"
+#include "CarpetTimers.hh"
+
#include "defs.hh"
#include "dh.hh"
#include "th.hh"
@@ -19,7 +21,11 @@ using namespace CarpetLib;
- // Constructors
+list<gh*> gh::allgh;
+
+
+
+// Constructors
gh::
gh (vector<ivect> const & reffacts_, centering const refcent_,
int const mgfact_, centering const mgcent_,
@@ -65,12 +71,15 @@ gh (vector<ivect> const & reffacts_, centering const refcent_,
boundary_width[0] + boundary_width[1]));
}
}
+
+ allghi = allgh.insert(allgh.end(), this);
}
// Destructors
gh::
~gh ()
{
+ allgh.erase(allghi);
}
@@ -78,14 +87,16 @@ gh::
// Modifiers
void
gh::
-regrid (rregs const & superregs, mregs const & regs)
+regrid (rregs const & superregs, mregs const & regs, bool const do_init)
{
DECLARE_CCTK_PARAMETERS;
+
+ static Carpet::Timer timer ("CarpetLib::gh::regrid");
+ timer.start();
superregions = superregs;
- // Save the grid hierarchy
- oldregions.clear ();
+ assert (oldregions.empty());
swap (oldregions, regions);
regions = regs;
@@ -182,19 +193,70 @@ regrid (rregs const & superregs, mregs const & regs)
}
}
+
+
+ // Calculate global and local components
+ global_components_.resize(reflevels());
+ local_components_.resize(reflevels());
+ for (int rl=0; rl<reflevels(); ++rl) {
+ {
+ int lc = 0;
+ for (int c=0; c<components(rl); ++c) {
+ lc += is_local(rl,c);
+ }
+ global_components_.AT(rl).resize(lc);
+ }
+ local_components_.AT(rl).resize(components(rl));
+ {
+ int lc = 0;
+ for (int c=0; c<components(rl); ++c) {
+ if (is_local(rl,c)) {
+ global_components_.AT(rl).AT(lc) = c;
+ local_components_.AT(rl).AT(c) = lc;
+ ++lc;
+ } else {
+ local_components_.AT(rl).AT(c) = -1;
+ }
+ }
+ }
+ }
+
+
+
// Output
if (output_bboxes) {
do_output_bboxes (cout);
do_output_bases (cout);
}
+
+
// Regrid the other hierarchies
for (list<th*>::iterator t=ths.begin(); t!=ths.end(); ++t) {
(*t)->regrid();
}
for (list<dh*>::iterator d=dhs.begin(); d!=dhs.end(); ++d) {
- (*d)->regrid();
+ (*d)->regrid(do_init);
+ }
+
+ timer.stop();
+}
+
+
+
+void
+gh::
+regrid_free (bool const do_init)
+{
+ oldregions.clear();
+
+ for (list<th*>::iterator t=ths.begin(); t!=ths.end(); ++t) {
+ (*t)->regrid_free();
+ }
+
+ for (list<dh*>::iterator d=dhs.begin(); d!=dhs.end(); ++d) {
+ (*d)->regrid_free(do_init);
}
}
@@ -205,11 +267,6 @@ gh::
recompose (int const rl,
bool const do_prolongate)
{
- // Handle changes in number of mglevels
- if (oldregions.size() != regions.size()) {
- oldregions.resize (regions.size());
- }
-
bool const do_recompose = level_did_change(rl);
if (do_recompose) {
@@ -219,12 +276,6 @@ recompose (int const rl,
(*d)->recompose (rl, do_prolongate);
}
- // Overwrite old with new grid hierarchy
- for (int ml=0; ml<mglevels(); ++ml) {
- oldregions.AT(ml).resize (regions.AT(ml).size());
- oldregions.AT(ml).AT(rl) = regions.AT(ml).AT(rl);
- }
-
}
return do_recompose;
@@ -235,7 +286,6 @@ recompose (int const rl,
bool
gh::
level_did_change (int const rl)
- const
{
// Find out whether this level changed
if (regions.size() != oldregions.size()) return true;
@@ -263,11 +313,23 @@ gh::
local_components (int const rl)
const
{
- int lc = 0;
- for (int c=0; c<components(rl); ++c) {
- lc += is_local(rl,c);
- }
- return lc;
+ return global_components_.AT(rl).size();
+}
+
+int
+gh::
+get_component (int const rl, int const lc)
+ const
+{
+ return global_components_.AT(rl).AT(lc);
+}
+
+int
+gh::
+get_local_component (int const rl, int const c)
+ const
+{
+ return local_components_.AT(rl).AT(c);
}
@@ -374,36 +436,36 @@ locate_position (ivect const & ipos,
// Time hierarchy management
-void
+gh::th_handle
gh::
add (th * const t)
{
- ths.push_back (t);
+ return ths.insert(ths.end(), t);
}
void
gh::
-remove (th * const t)
+erase (th_handle const ti)
{
- ths.remove (t);
+ ths.erase(ti);
}
// Data hierarchy management
-void
+gh::dh_handle
gh::
add (dh * const d)
{
- dhs.push_back (d);
+ return dhs.insert(dhs.end(), d);
}
void
gh::
-remove (dh * const d)
+erase (dh_handle di)
{
- dhs.remove (d);
+ dhs.erase (di);
}
@@ -416,6 +478,7 @@ memory ()
const
{
return
+ sizeof allghi + // memoryof (allghi) +
memoryof (reffacts) +
memoryof (refcent) +
memoryof (mgfact) +
@@ -428,6 +491,19 @@ memory ()
memoryof (dhs);
}
+size_t
+gh::
+allmemory ()
+{
+ size_t mem = memoryof(allgh);
+ for (list<gh*>::const_iterator
+ ghi = allgh.begin(); ghi != allgh.end(); ++ ghi)
+ {
+ mem += memoryof(**ghi);
+ }
+ return mem;
+}
+
// Output
diff --git a/Carpet/CarpetLib/src/gh.hh b/Carpet/CarpetLib/src/gh.hh
index b80d71ca3..1942ae38b 100644
--- a/Carpet/CarpetLib/src/gh.hh
+++ b/Carpet/CarpetLib/src/gh.hh
@@ -28,6 +28,9 @@ class gh;
// level. The extents do not include ghost zones.
class gh {
+ static list<gh*> allgh;
+ list<gh*>::iterator allghi;
+
public:
// Types
@@ -47,14 +50,21 @@ public: // should be readonly
vector<vector<ibbox> > baseextents; // [ml][rl]
const i2vect boundary_width;
+private:
+ vector<vector<int> > global_components_; // [rl][lc]
+ vector<vector<int> > local_components_; // [rl][c]
+public:
+
// Extents of the regions before distributing them over the
// processors
rregs superregions;
mregs regions; // extents and properties of all grids
- mregs oldregions; // a copy, used during regridding
+ mregs oldregions; // extents and properties of all grids
+ typedef list<th*>::iterator th_handle;
list<th*> ths; // list of all time hierarchies
+ typedef list<dh*>::iterator dh_handle;
list<dh*> dhs; // list of all data hierarchies
public:
@@ -69,64 +79,67 @@ public:
~gh ();
// Modifiers
- void regrid (rregs const & superregs, mregs const & regs);
+ void regrid (rregs const & superregs, mregs const & regs, bool do_init);
+ void regrid_free (bool do_init);
bool recompose (int rl, bool do_prolongate);
private:
- bool level_did_change (int rl) const;
+ bool level_did_change (int rl) CCTK_ATTRIBUTE_PURE;
// Accessors
public:
- ibbox const & extent (const int ml, const int rl, const int c) const
+ ibbox const & extent (const int ml, const int rl, const int c) const CCTK_ATTRIBUTE_PURE
{
return regions.AT(ml).AT(rl).AT(c).extent;
}
- ibbox const & baseextent (const int ml, const int rl) const
+ ibbox const & baseextent (const int ml, const int rl) const CCTK_ATTRIBUTE_PURE
{
return baseextents.AT(ml).AT(rl);
}
- b2vect const & outer_boundaries (const int rl, const int c) const
+ b2vect const & outer_boundaries (const int rl, const int c) const CCTK_ATTRIBUTE_PURE
{
return regions.AT(0).AT(rl).AT(c).outer_boundaries;
}
- int processor (const int rl, const int c) const
+ int processor (const int rl, const int c) const CCTK_ATTRIBUTE_PURE
{
return regions.AT(0).AT(rl).AT(c).processor;
}
- int old_processor (const int rl, const int c) const
+ int old_processor (const int rl, const int c) const CCTK_ATTRIBUTE_PURE
{
return oldregions.AT(0).AT(rl).AT(c).processor;
}
- int mglevels () const
+ int mglevels () const CCTK_ATTRIBUTE_PURE
{
return (int)regions.size();
}
- int reflevels () const
+ int reflevels () const CCTK_ATTRIBUTE_PURE
{
if (mglevels() == 0) return 0;
return (int)regions.AT(0).size();
}
- int components (const int rl) const
+ int components (const int rl) const CCTK_ATTRIBUTE_PURE
{
return (int)regions.AT(0).AT(rl).size();
}
- bool is_local (const int rl, const int c) const
+ bool is_local (const int rl, const int c) const CCTK_ATTRIBUTE_PURE
{
return processor(rl,c) == dist::rank();
}
- int local_components (const int rl) const;
+ int local_components (int rl) const CCTK_ATTRIBUTE_PURE;
+ int get_component (int rl, int lc) const CCTK_ATTRIBUTE_PURE;
+ int get_local_component (int rl, int c) const CCTK_ATTRIBUTE_PURE;
void locate_position (rvect const & rpos,
int const ml,
@@ -139,15 +152,16 @@ public:
int & rl, int & c, ivect & aligned_ipos) const;
// Time hierarchy management
- void add (th * t);
- void remove (th * t);
+ th_handle add (th * t);
+ void erase (th_handle ti);
// Data hierarchy management
- void add (dh * d);
- void remove (dh * d);
+ dh_handle add (dh * d);
+ void erase (dh_handle di);
// Output
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ static size_t allmemory () CCTK_ATTRIBUTE_PURE;
ostream & output (ostream & os) const;
private:
@@ -158,6 +172,7 @@ private:
+inline size_t memoryof (gh const & g) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (gh const & g)
{
return g.memory ();
diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
index 0b984b142..9dad6a55d 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
index c0e8b44fd..6fdaa854d 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
index 7d4c7fe26..0a2f5c66c 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
index f4204ea68..848d04a7e 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
index f3693c220..729bb20b5 100644
--- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/limits.cc b/Carpet/CarpetLib/src/limits.cc
new file mode 100644
index 000000000..f3c5ed650
--- /dev/null
+++ b/Carpet/CarpetLib/src/limits.cc
@@ -0,0 +1,95 @@
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <sys/resource.h>
+
+#include "defs.hh"
+
+#include "limits.hh"
+
+namespace CarpetLib {
+
+ using namespace std;
+
+
+
+ static
+ void
+ set_limit (int resource, char const * name, CCTK_INT value);
+
+ static
+ ostream &
+ operator<< (ostream & s, struct rlimit const & limit);
+
+ static
+ void
+ output (ostream & s, rlim_t const & value);
+
+
+
+ void
+ set_system_limits ()
+ {
+ DECLARE_CCTK_PARAMETERS;
+ set_limit (RLIMIT_CORE, "core file size", max_core_size_MB);
+ set_limit (RLIMIT_AS, "memory size", max_memory_size_MB);
+ }
+
+
+
+ void
+ set_limit (int const resource, char const * const name, CCTK_INT const value)
+ {
+ struct rlimit limit;
+ check (not getrlimit (resource, & limit));
+
+ if (value == -2 ) {
+ // Only show limit
+ cout << "Current " << name << " limit: " << limit << endl;
+ return;
+ }
+
+ cout << "Old " << name << " limit: " << limit << endl;
+
+ if (value == -1) {
+ limit.rlim_cur = limit.rlim_max;
+ } else {
+ limit.rlim_cur = min ((rlim_t) value * 1024 * 1024, limit.rlim_max);
+ }
+
+ check (not setrlimit (resource, & limit));
+ check (not getrlimit (resource, & limit));
+
+ cout << "New " << name << " limit: " << limit << endl;
+ }
+
+
+
+ static
+ ostream &
+ operator<< (ostream & s, struct rlimit const & limit)
+ {
+ s << "hard=";
+ output (s, limit.rlim_max);
+ s << ", soft=";
+ output (s, limit.rlim_cur);
+ return s;
+ }
+
+
+
+ static
+ void
+ output (ostream & s, rlim_t const & value)
+ {
+ if (value == RLIM_INFINITY) {
+ s << "[unlimited]";
+ } else {
+ s << (value / CCTK_REAL (1024*1024)) << " MB";
+ }
+ }
+
+} // namespace Carpet
diff --git a/Carpet/CarpetLib/src/limits.hh b/Carpet/CarpetLib/src/limits.hh
new file mode 100644
index 000000000..910f02e26
--- /dev/null
+++ b/Carpet/CarpetLib/src/limits.hh
@@ -0,0 +1,8 @@
+#include <cctk.h>
+
+namespace CarpetLib {
+
+ void
+ set_system_limits ();
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/make.code.defn b/Carpet/CarpetLib/src/make.code.defn
index 88f6261ce..81a0352ad 100644
--- a/Carpet/CarpetLib/src/make.code.defn
+++ b/Carpet/CarpetLib/src/make.code.defn
@@ -13,12 +13,16 @@ SRCS = bbox.cc \
gf.cc \
ggf.cc \
gh.cc \
+ limits.cc \
mem.cc \
+ mpi_string.cc \
region.cc \
+ startup_time.cc \
th.cc \
timestat.cc \
vect.cc \
copy_3d.cc \
+ copy_4d.cc \
interpolate_3d_2tl.cc \
interpolate_3d_3tl.cc \
interpolate_3d_4tl.cc \
@@ -26,6 +30,7 @@ SRCS = bbox.cc \
interpolate_eno_3d_3tl.cc \
restrict_3d_cc_rf2.cc \
restrict_3d_rf2.cc \
+ restrict_4d_rf2.cc \
prolongate_3d_cc_rf2.cc \
prolongate_3d_o1_rf2.cc \
prolongate_3d_o3_rf2.cc \
@@ -33,8 +38,13 @@ SRCS = bbox.cc \
prolongate_3d_o7_rf2.cc \
prolongate_3d_o9_rf2.cc \
prolongate_3d_o11_rf2.cc \
+ prolongate_3d_cc_o0_rf2.cc \
+ prolongate_3d_cc_o1_rf2.cc \
+ prolongate_3d_cc_o2_rf2.cc \
+ prolongate_3d_o5_monotone_rf2.cc \
prolongate_3d_real8_eno.F90 \
- prolongate_3d_real8_weno.F90
+ prolongate_3d_real8_weno.F90 \
+ prolongate_4d_o1_rf2.cc
# Subdirectories containing source files
SUBDIRS =
diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc
index 848988637..b3840a115 100644
--- a/Carpet/CarpetLib/src/mem.cc
+++ b/Carpet/CarpetLib/src/mem.cc
@@ -21,6 +21,11 @@
#include "defs.hh"
#include "dist.hh"
+#include "dh.hh"
+#include "gdata.hh"
+#include "ggf.hh"
+#include "gh.hh"
+#include "th.hh"
#include "mem.hh"
@@ -30,12 +35,18 @@ using namespace std;
+double const MEGA = 1024*1024;
+
+
+
struct mstat {
- // Carpet statistics
+ // Carpet object statistics
double total_bytes;
double total_objects;
double max_bytes;
double max_objects;
+ // Carpet administrative data structure statistics
+ double total_admin_bytes;
// malloc statistics
double malloc_used_bytes;
double malloc_free_bytes;
@@ -71,15 +82,15 @@ mem (size_t const vectorlength, size_t const nelems,
if (memptr == NULL) {
const double nbytes = vectorlength * nelems * sizeof (T);
if (max_allowed_memory_MB > 0
- and (total_allocated_bytes + nbytes > 1.0e6 * max_allowed_memory_MB))
+ and (total_allocated_bytes + nbytes > MEGA * max_allowed_memory_MB))
{
T Tdummy;
CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
"Refusing to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects. The parameter file specifies a maximum of %d MB",
- double(nbytes), double(nbytes/1.0e6),
+ double(nbytes), double(nbytes/MEGA),
typestring(Tdummy),
double(total_allocated_bytes),
- double(total_allocated_bytes/1.0e6),
+ double(total_allocated_bytes/MEGA),
int(total_allocated_objects),
int(max_allowed_memory_MB));
}
@@ -90,10 +101,10 @@ mem (size_t const vectorlength, size_t const nelems,
T Tdummy;
CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
"Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects",
- double(nbytes), double(nbytes/1.0e6),
+ double(nbytes), double(nbytes/MEGA),
typestring(Tdummy),
double(total_allocated_bytes),
- double(total_allocated_bytes/1.0e6),
+ double(total_allocated_bytes/MEGA),
int(total_allocated_objects));
}
total_allocated_bytes += nbytes;
@@ -214,7 +225,7 @@ alloc (size_t nbytes)
if (not freeptr) {
CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
"Failed to allocate %.3f MB of memory",
- double(freesize/1.0e6));
+ double(freesize/MEGA));
}
// Remember the pointer so that it can be freed
chunks.push (freeptr);
@@ -251,40 +262,6 @@ memory ()
-extern "C" void CarpetLib_setmemlimit (CCTK_ARGUMENTS);
-
-void CarpetLib_setmemlimit (CCTK_ARGUMENTS)
-{
- DECLARE_CCTK_ARGUMENTS;
- DECLARE_CCTK_PARAMETERS;
-
- // Set address space limit
- struct rlimit aslimit;
- {
- check (not getrlimit (RLIMIT_AS, & aslimit));
- }
- CCTK_VInfo (CCTK_THORNSTRING,
- "Old address space size limit: hard=%lld, soft=%lld",
- (long long) aslimit.rlim_max, (long long) aslimit.rlim_cur);
- if (max_allowed_memory_MB > 0) {
- aslimit.rlim_cur = max_allowed_memory_MB * 1000000LL;
- }
- {
- check (not setrlimit (RLIMIT_AS, & aslimit));
- }
- {
- check (not getrlimit (RLIMIT_AS, & aslimit));
- }
- CCTK_VInfo (CCTK_THORNSTRING,
- "Old address space size limit: hard=%lld, soft=%lld",
- (long long) aslimit.rlim_max, (long long) aslimit.rlim_cur);
- CCTK_VInfo (CCTK_THORNSTRING,
- "(Unlimited address space size indicated by %lld)",
- (long long) RLIM_INFINITY);
-}
-
-
-
extern "C" void CarpetLib_printmemstats (CCTK_ARGUMENTS);
void CarpetLib_printmemstats (CCTK_ARGUMENTS)
@@ -294,14 +271,17 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
int const ioproc = 0;
- if (print_memstats_every > 0
- and cctk_iteration % print_memstats_every == 0)
+ if ((print_memstats_every == 0 and cctk_iteration == 0) or
+ (print_memstats_every > 0 and cctk_iteration % print_memstats_every == 0))
{
mstat mybuf;
mybuf.total_bytes = total_allocated_bytes;
mybuf.total_objects = total_allocated_objects;
mybuf.max_bytes = max_allocated_bytes;
mybuf.max_objects = max_allocated_objects;
+ mybuf.total_admin_bytes =
+ gh::allmemory() + dh::allmemory() + th::allmemory() +
+ ggf::allmemory() + gdata::allmemory();
#ifdef HAVE_MALLINFO
// NOTE: struct mallinfo returns byte-counts as int, which can
// overflow. In this case, the information is incorrect.
@@ -316,14 +296,23 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
cout << "Memory statistics from CarpetLib:" << eol
<< " Current number of objects: " << total_allocated_objects << eol
<< " Current allocated memory: "
- << setprecision(3) << total_allocated_bytes / 1.0e6 << " MB" << eol
+ << setprecision(3) << total_allocated_bytes / MEGA << " MB" << eol
<< " Maximum number of objects: " << max_allocated_objects << eol
<< " Maximum allocated memory: "
- << setprecision(3) << max_allocated_bytes / 1.0e6 << " MB" << eol
+ << setprecision(3) << max_allocated_bytes / MEGA << " MB" << eol
+ << " Current administrative memory: "
+ << setprecision(3) << mybuf.total_admin_bytes / MEGA << " MB" << eol
<< " Total allocated used system memory: "
- << setprecision(3) << mybuf.malloc_used_bytes / 1.0e6 << " MB" << eol
+ << setprecision(3) << mybuf.malloc_used_bytes / MEGA << " MB" << eol
<< " Total allocated free system memory: "
- << setprecision(3) << mybuf.malloc_free_bytes / 1.0e6 << " MB" << endl;
+ << setprecision(3) << mybuf.malloc_free_bytes / MEGA << " MB" << endl;
+
+#warning "TODO"
+ cout << " gh::allmemory: " << gh ::allmemory() << eol
+ << " dh::allmemory: " << dh ::allmemory() << eol
+ << " th::allmemory: " << th ::allmemory() << eol
+ << " ggf::allmemory: " << ggf ::allmemory() << eol
+ << " gdata::allmemory: " << gdata::allmemory() << endl;
if (strcmp (memstat_file, "") != 0) {
vector<mstat> allbuf (dist::size());
@@ -333,9 +322,15 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
if (dist::rank() == ioproc) {
+ double max_total_bytes = 0;
+ double avg_total_bytes = 0;
+ double cnt_total_bytes = 0;
double max_max_bytes = 0;
double avg_max_bytes = 0;
double cnt_max_bytes = 0;
+ double max_admin_bytes = 0;
+ double avg_admin_bytes = 0;
+ double cnt_admin_bytes = 0;
double max_used_bytes = 0;
double avg_used_bytes = 0;
double cnt_used_bytes = 0;
@@ -343,9 +338,15 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
double avg_free_bytes = 0;
double cnt_free_bytes = 0;
for (size_t n=0; n<allbuf.size(); ++n) {
+ max_total_bytes = max (max_total_bytes, allbuf[n].total_bytes);
+ avg_total_bytes += allbuf[n].total_bytes;
+ ++ cnt_total_bytes;
max_max_bytes = max (max_max_bytes, allbuf[n].max_bytes);
avg_max_bytes += allbuf[n].max_bytes;
++ cnt_max_bytes;
+ max_admin_bytes = max (max_admin_bytes, allbuf[n].total_admin_bytes);
+ avg_admin_bytes += allbuf[n].total_admin_bytes;
+ ++ cnt_admin_bytes;
max_used_bytes = max (max_used_bytes, allbuf[n].malloc_used_bytes);
avg_used_bytes += allbuf[n].malloc_used_bytes;
++ cnt_used_bytes;
@@ -353,7 +354,9 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
avg_free_bytes += allbuf[n].malloc_free_bytes;
++ cnt_free_bytes;
}
+ avg_total_bytes /= cnt_total_bytes;
avg_max_bytes /= cnt_max_bytes;
+ avg_admin_bytes /= cnt_admin_bytes;
avg_used_bytes /= cnt_used_bytes;
avg_free_bytes /= cnt_free_bytes;
@@ -377,13 +380,15 @@ void CarpetLib_printmemstats (CCTK_ARGUMENTS)
}
file << "# Running on " << dist::size() << " processors" << eol;
file << "#" << eol;
- file << "# iteration maxmaxbytes avgmaxbytes maxusedbytes avgusedbytes maxfreebytes avgfreebytes" << eol;
+ file << "# iteration maxtotalbytes avgtotalbytes maxmaxbytes avgm avgfreebytes" << eol;
} else {
file.open (filename.c_str(), ios::out | ios::app);
}
file << cctk_iteration
+ << "\t "<< max_total_bytes << " " << avg_total_bytes
<< "\t "<< max_max_bytes << " " << avg_max_bytes
+ << "\t "<< max_admin_bytes << " " << avg_admin_bytes
<< "\t "<< max_used_bytes << " " << avg_used_bytes
<< "\t "<< max_free_bytes << " " << avg_free_bytes
<< eol;
diff --git a/Carpet/CarpetLib/src/mem.hh b/Carpet/CarpetLib/src/mem.hh
index dbd976c21..fe8694d32 100644
--- a/Carpet/CarpetLib/src/mem.hh
+++ b/Carpet/CarpetLib/src/mem.hh
@@ -24,7 +24,7 @@ public:
T * memptr = NULL, size_t memsize = 0);
~mem ();
- T * storage (size_t vectorindex) const
+ T * storage (size_t vectorindex) const CCTK_ATTRIBUTE_PURE
{
assert (vectorindex < vectorlength_);
assert (clients_.AT(vectorindex));
@@ -33,13 +33,15 @@ public:
void register_client (size_t vectorindex);
void unregister_client (size_t vectorindex);
- bool has_clients () const;
+ bool has_clients () const CCTK_ATTRIBUTE_PURE;
// Memory usage
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
};
template<typename T>
+inline size_t memoryof (mem<T> const & m) CCTK_ATTRIBUTE_PURE;
+template<typename T>
inline size_t memoryof (mem<T> const & m) { return m.memory(); }
@@ -84,9 +86,10 @@ public:
void * alloc (size_t nbytes);
// Memory usage
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
};
+inline size_t memoryof (mempool const & m) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (mempool const & m) { return m.memory(); }
#endif // ifndef MEM_HH
diff --git a/Carpet/CarpetLib/src/mpi_string.cc b/Carpet/CarpetLib/src/mpi_string.cc
new file mode 100644
index 000000000..063ed0bdf
--- /dev/null
+++ b/Carpet/CarpetLib/src/mpi_string.cc
@@ -0,0 +1,480 @@
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include <mpi.h>
+
+#include "cctk.h"
+
+#include "dh.hh"
+#include "mpi_string.hh"
+#include "region.hh"
+
+
+
+namespace CarpetLib
+{
+
+ using namespace std;
+
+
+
+ vector <string>
+ allgather_string (MPI_Comm const comm,
+ string const & data)
+ {
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Gather the lengths of the data strings
+ int const length = data.length();
+ vector <int> lengths (num_procs);
+
+ MPI_Allgather (const_cast <int *> (& length), 1, MPI_INT,
+ & lengths.front(), 1, MPI_INT,
+ comm);
+
+ // Allocate space for all data strings
+ vector <int> offsets (num_procs + 1);
+ offsets.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets.AT(n + 1) = offsets.AT(n) + lengths.AT(n);
+ }
+ int const total_length = offsets.AT(num_procs);
+ vector <char> alldata_buffer (total_length);
+
+ // Gather all data strings
+ MPI_Allgatherv (const_cast <char *> (data.c_str()), length, MPI_CHAR,
+ & alldata_buffer.front(),
+ const_cast <int *> (& lengths.front()),
+ const_cast <int *> (& offsets.front()),
+ MPI_CHAR,
+ comm);
+
+ // Convert data buffer with C strings to C++ strings
+ vector <string> alldata (num_procs);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ alldata.AT(n) =
+ string (& alldata_buffer.AT (offsets.AT(n)), lengths.AT(n));
+ }
+
+ return alldata;
+ }
+
+
+
+ vector <string>
+ alltoallv_string (MPI_Comm const comm,
+ vector<string> const & data)
+ {
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Exchange the lengths of the data strings
+ vector <int> lengths_in (num_procs);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ lengths_in.AT(n) = data.AT(n).length();
+ }
+ vector <int> lengths (num_procs);
+ MPI_Alltoall (& lengths_in.front(), 1, MPI_INT,
+ & lengths.front(), 1, MPI_INT,
+ comm);
+
+ // Allocate space for all data strings
+ vector <int> offsets_in (num_procs + 1);
+ offsets_in.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_in.AT(n + 1) = offsets_in.AT(n) + lengths_in.AT(n);
+ }
+ int const total_length_in = offsets_in.AT(num_procs);
+ vector <char> alldata_buffer_in (total_length_in);
+
+ vector <int> offsets (num_procs + 1);
+ offsets.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets.AT(n + 1) = offsets.AT(n) + lengths.AT(n);
+ }
+ int const total_length = offsets.AT(num_procs);
+ vector <char> alldata_buffer (total_length);
+
+ // Convert C++ strings to data buffer with C strings
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ memcpy (& alldata_buffer_in.AT (offsets_in.AT(n)),
+ data.AT(n).c_str(),
+ lengths_in.AT(n));
+ }
+
+ // Exchange all data strings
+ MPI_Alltoallv (& alldata_buffer_in.front(),
+ & lengths_in.front(), & offsets_in.front(), MPI_CHAR,
+ & alldata_buffer.front(),
+ & lengths.front(), & offsets.front(), MPI_CHAR,
+ comm);
+
+ // Convert data buffer with C strings to C++ strings
+ vector <string> alldata (num_procs);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ alldata.AT(n) =
+ string (& alldata_buffer.AT (offsets.AT(n)), lengths.AT(n));
+ }
+
+ return alldata;
+ }
+
+
+
+ string
+ broadcast_string (MPI_Comm const comm,
+ int const root,
+ string const & data)
+ {
+ // Get my rank
+ int rank;
+ MPI_Comm_rank (comm, & rank);
+
+ if (rank == root) {
+
+ // Broadcast the length of the data string
+ int const length = data.length();
+ MPI_Bcast (const_cast <int *> (& length), 1, MPI_INT, root, comm);
+
+ // Broadcast data string
+ char const * const buf = data.c_str();
+ MPI_Bcast (const_cast <char *> (buf), length, MPI_CHAR, root, comm);
+
+ // Return original string
+ return data;
+
+ } else {
+
+ // Broadcast the length of the data string
+ int length;
+ MPI_Bcast (& length, 1, MPI_INT, root, comm);
+
+ // Allocate space for data string
+ vector <char> data_buffer (length);
+
+ // Broadcast data string
+ char * const buf = & data_buffer.front();
+ MPI_Bcast (buf, length, MPI_CHAR, root, comm);
+
+ // Convert data buffer with C strings to C++ strings
+ string const result = string (& data_buffer.front(), length);
+
+ return result;
+
+ }
+ }
+
+
+
+ //////////////////////////////////////////////////////////////////////////////
+
+
+
+ template <typename T>
+ vector <vector <T> >
+ allgatherv (MPI_Comm comm,
+ vector <T> const & data)
+ {
+ // cerr << "QQQ: allgatherv[0]" << endl;
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Exchange the sizes of the data vectors
+ int const size_in = data.size();
+ vector <int> sizes_out (num_procs);
+ // cerr << "QQQ: allgatherv[1] size_in=" << size_in << endl;
+ MPI_Allgather (const_cast <int *> (& size_in), 1, MPI_INT,
+ & sizes_out.front(), 1, MPI_INT,
+ comm);
+ // cerr << "QQQ: allgatherv[2]" << endl;
+
+ // Allocate space for all data vectors
+ vector <int> offsets_out (num_procs + 1);
+ offsets_out.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+ }
+ int const total_length_out = offsets_out.AT(num_procs);
+ vector <T> alldata_buffer_out (total_length_out);
+
+ // Exchange all data vectors
+ T const dummy;
+ MPI_Datatype const type = mpi_datatype (dummy);
+ int datatypesize;
+ MPI_Type_size (type, &datatypesize);
+ // cerr << "QQQ: allgatherv[3] total_length_out=" << total_length_out << " datatypesize=" << datatypesize << endl;
+#if 0
+ MPI_Allgatherv (const_cast <T *> (& data.front()),
+ size_in, type,
+ & alldata_buffer_out.front(),
+ & sizes_out.front(), & offsets_out.front(), type,
+ comm);
+#else
+ int const typesize = sizeof(T);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ sizes_out.AT(n) *= typesize;
+ offsets_out.AT(n) *= typesize;
+ }
+ MPI_Allgatherv (const_cast <T *> (& data.front()),
+ size_in * typesize, MPI_CHAR,
+ & alldata_buffer_out.front(),
+ & sizes_out.front(), & offsets_out.front(), MPI_CHAR,
+ comm);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ sizes_out.AT(n) /= typesize;
+ offsets_out.AT(n) /= typesize;
+ }
+#endif
+ // cerr << "QQQ: allgatherv[4]" << endl;
+
+ // Convert data buffer to vectors
+ vector <vector <T> > alldata_out (num_procs);
+ {
+ typename vector <T>::const_iterator p = alldata_buffer_out.begin();
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ typename vector <T>::const_iterator const pold = p;
+ advance (p, sizes_out.AT(n));
+ alldata_out.AT(n).assign (pold, p);
+ }
+ assert (p == alldata_buffer_out.end());
+ }
+
+ // cerr << "QQQ: allgatherv[5]" << endl;
+ return alldata_out;
+ }
+
+
+
+ template <typename T>
+ vector <T>
+ alltoall (MPI_Comm const comm,
+ vector <T> const & data)
+ {
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Allocate space for all data
+ vector <T> alldata (num_procs);
+
+ // Exchange all data vectors
+ T const dummy;
+ MPI_Datatype const type = mpi_datatype (dummy);
+ MPI_Alltoall (& data.front(), 1, type,
+ & alldata.front(), 1, type,
+ comm);
+
+ return alldata;
+ }
+
+
+
+ template <typename T>
+ vector <vector <T> >
+ alltoallv (MPI_Comm const comm,
+ vector <vector <T> > const & data)
+ {
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Exchange the sizes of the data vectors
+ vector <int> sizes_in (num_procs);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ sizes_in.AT(n) = data.AT(n).size();
+ }
+ vector <int> sizes_out (num_procs);
+ MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
+ & sizes_out.front(), 1, MPI_INT,
+ comm);
+
+ // Copy vectors to data buffer
+ vector <int> offsets_in (num_procs + 1);
+ offsets_in.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
+ }
+ int const total_length_in = offsets_in.AT(num_procs);
+ vector <T> alldata_buffer_in;
+ alldata_buffer_in.reserve (total_length_in);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ alldata_buffer_in.insert (alldata_buffer_in.end(),
+ data.AT(n).begin(), data.AT(n).end());
+ }
+
+ // Allocate space for all data vectors
+ vector <int> offsets_out (num_procs + 1);
+ offsets_out.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+ }
+ int const total_length_out = offsets_out.AT(num_procs);
+ vector <T> alldata_buffer_out (total_length_out);
+
+ // Exchange all data vectors
+ T const dummy;
+ MPI_Datatype const type = mpi_datatype (dummy);
+ MPI_Alltoallv (& alldata_buffer_in.front(),
+ & sizes_in.front(), & offsets_in.front(), type,
+ & alldata_buffer_out.front(),
+ & sizes_out.front(), & offsets_out.front(), type,
+ comm);
+
+ // Convert data buffer to vectors
+ vector <vector <T> > alldata_out (num_procs);
+ {
+ typename vector <T>::const_iterator p = alldata_buffer_out.begin();
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ typename vector <T>::const_iterator const pold = p;
+ advance (p, sizes_out.AT(n));
+ alldata_out.AT(n).assign (pold, p);
+ }
+ }
+
+ return alldata_out;
+ }
+
+
+
+ template <typename T>
+ vector <T>
+ alltoallv1 (MPI_Comm const comm,
+ vector <vector <T> > const & data)
+ {
+ // Get the total number of processors
+ int num_procs;
+ MPI_Comm_size (comm, & num_procs);
+
+ // Exchange the sizes of the data vectors
+ vector <int> sizes_in (num_procs);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ sizes_in.AT(n) = data.AT(n).size();
+ }
+ vector <int> sizes_out (num_procs);
+ // cerr << "QQQ: alltoallv1[1]" << endl;
+ MPI_Alltoall (& sizes_in.front(), 1, MPI_INT,
+ & sizes_out.front(), 1, MPI_INT,
+ comm);
+ // cerr << "QQQ: alltoallv1[2]" << endl;
+
+#if 0
+ // Copy vectors to data buffer
+ vector <int> offsets_in (num_procs + 1);
+ offsets_in.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_in.AT(n + 1) = offsets_in.AT(n) + sizes_in.AT(n);
+ }
+ int const total_length_in = offsets_in.AT(num_procs);
+ vector <T> alldata_buffer_in;
+ alldata_buffer_in.reserve (total_length_in);
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ alldata_buffer_in.insert (alldata_buffer_in.end(),
+ data.AT(n).begin(), data.AT(n).end());
+ }
+
+ // Allocate space for all data vectors
+ vector <int> offsets_out (num_procs + 1);
+ offsets_out.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+ }
+ int const total_length_out = offsets_out.AT(num_procs);
+ vector <T> alldata_buffer_out (total_length_out);
+
+ // Exchange all data vectors
+ T const dummy;
+ MPI_Datatype const type = mpi_datatype (dummy);
+ // cerr << "QQQ: alltoallv1[3]" << endl;
+ MPI_Alltoallv (& alldata_buffer_in.front(),
+ & sizes_in.front(), & offsets_in.front(), type,
+ & alldata_buffer_out.front(),
+ & sizes_out.front(), & offsets_out.front(), type,
+ comm);
+ // cerr << "QQQ: alltoallv1[4]" << endl;
+#endif
+
+ // Allocate space for all data vectors
+ vector <int> offsets_out (num_procs + 1);
+ offsets_out.AT(0) = 0;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ offsets_out.AT(n + 1) = offsets_out.AT(n) + sizes_out.AT(n);
+ }
+ int const total_length_out = offsets_out.AT(num_procs);
+ vector <T> alldata_buffer_out (total_length_out);
+
+ // Exchange all data vectors
+ T const dummy;
+ MPI_Datatype const type = mpi_datatype (dummy);
+ int const tag = 4711;
+ vector <MPI_Request> reqs (2 * num_procs);
+ int nreqs = 0;
+ // cerr << "QQQ: alltoallv1[5]" << endl;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ if (sizes_out.AT(n) > 0) {
+ MPI_Irecv (& alldata_buffer_out.AT(offsets_out.AT(n)),
+ sizes_out.AT(n),
+ type,
+ n, tag, comm, & reqs.AT(nreqs));
+ ++ nreqs;
+ }
+ }
+ // cerr << "QQQ: alltoallv1[6]" << endl;
+ for (int n = 0; n < num_procs; ++ n)
+ {
+ if (sizes_in.AT(n) > 0) {
+ MPI_Isend (const_cast <T *> (& data.AT(n).front()),
+ sizes_in.AT(n),
+ type,
+ n, tag, comm, & reqs.AT(nreqs));
+ ++ nreqs;
+ }
+ }
+ // cerr << "QQQ: alltoallv1[7]" << endl;
+ MPI_Waitall (nreqs, & reqs.front(), MPI_STATUSES_IGNORE);
+ // cerr << "QQQ: alltoallv1[8]" << endl;
+
+ return alldata_buffer_out;
+ }
+
+
+
+ template
+ vector <vector <dh::dboxes> >
+ allgatherv (MPI_Comm comm,
+ vector <dh::dboxes> const & data);
+
+ template
+ vector <sendrecv_pseudoregion_t>
+ alltoallv1 (MPI_Comm comm,
+ vector <vector <sendrecv_pseudoregion_t> > const & data);
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/mpi_string.hh b/Carpet/CarpetLib/src/mpi_string.hh
new file mode 100644
index 000000000..6ed8d447d
--- /dev/null
+++ b/Carpet/CarpetLib/src/mpi_string.hh
@@ -0,0 +1,55 @@
+#include <string>
+#include <vector>
+
+#include <cctk.h>
+
+#include <mpi.h>
+
+
+
+namespace CarpetLib
+{
+
+ using namespace std;
+
+
+
+ // String communication
+
+ vector <string>
+ allgather_string (MPI_Comm comm,
+ string const & data);
+
+ vector <string>
+ alltoallv_string (MPI_Comm comm,
+ vector <string> const & data);
+
+ string
+ broadcast_string (MPI_Comm comm, int root,
+ string const & data);
+
+
+
+ // Arbitrary datatypes
+
+ template <typename T>
+ vector <vector <T> >
+ allgatherv (MPI_Comm comm,
+ vector <T> const & data);
+
+ template <typename T>
+ vector <T>
+ alltoall (MPI_Comm comm,
+ vector <T> const & data);
+
+ template <typename T>
+ vector <vector <T> >
+ alltoallv (MPI_Comm comm,
+ vector <vector <T> > const & data);
+
+ template <typename T>
+ vector <T>
+ alltoallv1 (MPI_Comm comm,
+ vector <vector <T> > const & data);
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/operator_prototypes.hh b/Carpet/CarpetLib/src/operator_prototypes.hh
index 0eb990923..1feae8a90 100644
--- a/Carpet/CarpetLib/src/operator_prototypes.hh
+++ b/Carpet/CarpetLib/src/operator_prototypes.hh
@@ -1,253 +1,12 @@
#ifndef OPERATOR_PROTOTYPES
#define OPERATOR_PROTOTYPES
-#include <cstdlib>
-
-#include <cctk.h>
-
-#include <vect.hh>
-#include <bbox.hh>
-
namespace CarpetLib {
- using namespace std;
-
-
-
- static inline
- size_t
- index3 (size_t const i, size_t const j, size_t const k,
- size_t const exti, size_t const extj, size_t const extk)
- {
-#ifdef CARPET_DEBUG
- assert (static_cast <ptrdiff_t> (i) >= 0 and i < exti);
- assert (static_cast <ptrdiff_t> (j) >= 0 and j < extj);
- assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk);
-#endif
-
- return i + exti * (j + extj * k);
- }
-
-
-
- static int const dim3 = 3;
-
- typedef vect <bool, dim3> bvect3;
- typedef vect <int, dim3> ivect3;
- typedef bbox <int, dim3> ibbox3;
-
static int const reffact2 = 2;
-
-
- template <typename T>
- void
- copy_3d (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
- template <typename T>
- void
- prolongate_3d_o1_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_o3_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_o5_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_o7_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_o9_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_o11_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
- template <typename T>
- void
- restrict_3d_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
- template <typename T>
- void
- interpolate_3d_2tl (T const * restrict const src1,
- CCTK_REAL const t1,
- T const * restrict const src2,
- CCTK_REAL const t2,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- CCTK_REAL const t,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- interpolate_3d_3tl (T const * restrict const src1,
- CCTK_REAL const t1,
- T const * restrict const src2,
- CCTK_REAL const t2,
- T const * restrict const src3,
- CCTK_REAL const t3,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- CCTK_REAL const t,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- interpolate_3d_4tl (T const * restrict const src1,
- CCTK_REAL const t1,
- T const * restrict const src2,
- CCTK_REAL const t2,
- T const * restrict const src3,
- CCTK_REAL const t3,
- T const * restrict const src4,
- CCTK_REAL const t4,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- CCTK_REAL const t,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- interpolate_3d_5tl (T const * restrict const src1,
- CCTK_REAL const t1,
- T const * restrict const src2,
- CCTK_REAL const t2,
- T const * restrict const src3,
- CCTK_REAL const t3,
- T const * restrict const src4,
- CCTK_REAL const t4,
- T const * restrict const src5,
- CCTK_REAL const t5,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- CCTK_REAL const t,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- interpolate_eno_3d_3tl (T const * restrict const src1,
- CCTK_REAL const t1,
- T const * restrict const src2,
- CCTK_REAL const t2,
- T const * restrict const src3,
- CCTK_REAL const t3,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- CCTK_REAL const t,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
- template <typename T>
- void
- prolongate_3d_cc_rf2_std2prim (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
- template <typename T>
- void
- prolongate_3d_cc_rf2_prim2std (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
- template <typename T>
- void
- restrict_3d_cc_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict regbbox);
-
-
-
} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/operator_prototypes_3d.hh b/Carpet/CarpetLib/src/operator_prototypes_3d.hh
new file mode 100644
index 000000000..19deccea8
--- /dev/null
+++ b/Carpet/CarpetLib/src/operator_prototypes_3d.hh
@@ -0,0 +1,305 @@
+#ifndef OPERATOR_PROTOTYPES_3D
+#define OPERATOR_PROTOTYPES_3D
+
+#include <cstdlib>
+
+#include <cctk.h>
+
+#include "defs.hh"
+#include "bbox.hh"
+#include "vect.hh"
+
+#include "operator_prototypes.hh"
+
+
+
+namespace CarpetLib {
+
+ using namespace std;
+
+
+
+ static inline
+ size_t
+ index3 (size_t const i, size_t const j, size_t const k,
+ size_t const exti, size_t const extj, size_t const extk)
+ CCTK_ATTRIBUTE_CONST;
+ static inline
+ size_t
+ index3 (size_t const i, size_t const j, size_t const k,
+ size_t const exti, size_t const extj, size_t const extk)
+ {
+#ifdef CARPET_DEBUG
+ assert (static_cast <ptrdiff_t> (i) >= 0 and i < exti);
+ assert (static_cast <ptrdiff_t> (j) >= 0 and j < extj);
+ assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk);
+#endif
+
+ return i + exti * (j + extj * k);
+ }
+
+
+
+ static int const dim3 = 3;
+
+ typedef vect <bool, dim3> bvect3;
+ typedef vect <int, dim3> ivect3;
+ typedef bbox <int, dim3> ibbox3;
+
+
+
+ template <typename T>
+ void
+ copy_3d (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_o1_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_o3_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_o5_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_o7_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_o9_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_o11_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_o5_monotone_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o0_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o1_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o2_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ restrict_3d_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ interpolate_3d_2tl (T const * restrict const src1,
+ CCTK_REAL const t1,
+ T const * restrict const src2,
+ CCTK_REAL const t2,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ CCTK_REAL const t,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ interpolate_3d_3tl (T const * restrict const src1,
+ CCTK_REAL const t1,
+ T const * restrict const src2,
+ CCTK_REAL const t2,
+ T const * restrict const src3,
+ CCTK_REAL const t3,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ CCTK_REAL const t,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ interpolate_3d_4tl (T const * restrict const src1,
+ CCTK_REAL const t1,
+ T const * restrict const src2,
+ CCTK_REAL const t2,
+ T const * restrict const src3,
+ CCTK_REAL const t3,
+ T const * restrict const src4,
+ CCTK_REAL const t4,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ CCTK_REAL const t,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ interpolate_3d_5tl (T const * restrict const src1,
+ CCTK_REAL const t1,
+ T const * restrict const src2,
+ CCTK_REAL const t2,
+ T const * restrict const src3,
+ CCTK_REAL const t3,
+ T const * restrict const src4,
+ CCTK_REAL const t4,
+ T const * restrict const src5,
+ CCTK_REAL const t5,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ CCTK_REAL const t,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ interpolate_eno_3d_3tl (T const * restrict const src1,
+ CCTK_REAL const t1,
+ T const * restrict const src2,
+ CCTK_REAL const t2,
+ T const * restrict const src3,
+ CCTK_REAL const t3,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ CCTK_REAL const t,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_cc_rf2_std2prim (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+ template <typename T>
+ void
+ prolongate_3d_cc_rf2_prim2std (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ restrict_3d_cc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox);
+
+
+
+} // namespace CarpetLib
+
+
+
+#endif // #ifndef OPERATOR_PROTOTYPES_3D
diff --git a/Carpet/CarpetLib/src/operator_prototypes_4d.hh b/Carpet/CarpetLib/src/operator_prototypes_4d.hh
new file mode 100644
index 000000000..afd75c655
--- /dev/null
+++ b/Carpet/CarpetLib/src/operator_prototypes_4d.hh
@@ -0,0 +1,92 @@
+#ifndef OPERATOR_PROTOTYPES_4D
+#define OPERATOR_PROTOTYPES_4D
+
+#include <cstdlib>
+
+#include <cctk.h>
+
+#include "defs.hh"
+#include "bbox.hh"
+#include "vect.hh"
+
+#include "operator_prototypes.hh"
+
+
+
+namespace CarpetLib {
+
+ using namespace std;
+
+
+
+ static inline
+ size_t
+ index4 (size_t const i, size_t const j, size_t const k, size_t const kl,
+ size_t const exti, size_t const extj, size_t const extk, size_t const extl)
+ CCTK_ATTRIBUTE_CONST;
+ static inline
+ size_t
+ index4 (size_t const i, size_t const j, size_t const k, size_t const l,
+ size_t const exti, size_t const extj, size_t const extk, size_t const extl)
+ {
+#ifdef CARPET_DEBUG
+ assert (static_cast <ptrdiff_t> (i) >= 0 and i < exti);
+ assert (static_cast <ptrdiff_t> (j) >= 0 and j < extj);
+ assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk);
+ assert (static_cast <ptrdiff_t> (l) >= 0 and l < extl);
+#endif
+
+ return i + exti * (j + extj * (k + extk * l));
+ }
+
+
+
+ static int const dim4 = 4;
+
+ typedef vect <bool, dim4> bvect4;
+ typedef vect <int, dim4> ivect4;
+ typedef bbox <int, dim4> ibbox4;
+
+
+
+ template <typename T>
+ void
+ copy_4d (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ prolongate_4d_o1_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox);
+
+
+
+ template <typename T>
+ void
+ restrict_4d_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox);
+
+
+
+} // namespace CarpetLib
+
+
+
+#endif // #ifndef OPERATOR_PROTOTYPES_4D
diff --git a/Carpet/CarpetLib/src/operators.hh b/Carpet/CarpetLib/src/operators.hh
index 8ecd308a4..07e4e5616 100644
--- a/Carpet/CarpetLib/src/operators.hh
+++ b/Carpet/CarpetLib/src/operators.hh
@@ -9,11 +9,13 @@ enum operator_type
op_none, // do not transport
op_sync, // transport only on the same level
// (error if called between levels)
+ op_restrict, // restrict only, do not prolongate
op_copy, // use simple copying for prolongation
// (needs only one time level)
op_Lagrange, // Lagrange interpolation (standard)
op_ENO, // use ENO stencils (for hydro)
- op_WENO // use WENO stencils (for hydro)
+ op_WENO, // use WENO stencils (for hydro)
+ op_Lagrange_monotone // monotone Lagrange interpolation (for hydro)
};
#endif // OPERATORS_HH
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc
new file mode 100644
index 000000000..352f4c380
--- /dev/null
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o0_rf2.cc
@@ -0,0 +1,320 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_3d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstiext, dstjext, dstkext)
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o0_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox)
+ {
+ typedef typename typeprops<T>::real RT;
+
+
+
+ if (any (srcbbox.stride() <= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: source strides are not twice the destination strides");
+ }
+
+ if (any (dstbbox.stride() % 2 != 0)) {
+ CCTK_WARN (0, "Internal error: destination strides are not even");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+
+
+ ivect3 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0));
+ ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0));
+ ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride();
+
+
+
+ bvect3 const needoffsetlo = srcoff % reffact2 != 0;
+ bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0;
+ ivect3 const offsetlo = either (needoffsetlo, 1, 0);
+ ivect3 const offsethi = either (needoffsethi, 1, 0);
+
+
+
+ if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or
+ not regbbox .is_contained_in(dstbbox))
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ size_t const srciext = srcext[0];
+ size_t const srcjext = srcext[1];
+ size_t const srckext = srcext[2];
+
+ size_t const dstiext = dstext[0];
+ size_t const dstjext = dstext[1];
+ size_t const dstkext = dstext[2];
+
+ size_t const regiext = regext[0];
+ size_t const regjext = regext[1];
+ size_t const regkext = regext[2];
+
+ size_t const srcioff = srcoff[0];
+ size_t const srcjoff = srcoff[1];
+ size_t const srckoff = srcoff[2];
+
+ size_t const dstioff = dstoff[0];
+ size_t const dstjoff = dstoff[1];
+ size_t const dstkoff = dstoff[2];
+
+
+
+ size_t const fi = srcioff % 2;
+ size_t const fj = srcjoff % 2;
+ size_t const fk = srckoff % 2;
+
+ size_t const i0 = srcioff / 2;
+ size_t const j0 = srcjoff / 2;
+ size_t const k0 = srckoff / 2;
+
+
+
+ // Loop over fine region
+ // Label scheme: l 8 fk fj fi
+
+ size_t is, js, ks;
+ size_t id, jd, kd;
+ size_t i, j, k;
+
+ // begin k loop
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l80;
+ goto l81;
+
+ // begin j loop
+ l80:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l800;
+ goto l801;
+
+ // begin i loop
+ l800:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8000;
+ goto l8001;
+
+ // kernel
+ l8000:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8001;
+ goto l900;
+
+ // kernel
+ l8001:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8000;
+ goto l900;
+
+ // end i loop
+ l900:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l801;
+ goto l90;
+
+ // begin i loop
+ l801:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8010;
+ goto l8011;
+
+ // kernel
+ l8010:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8011;
+ goto l901;
+
+ // kernel
+ l8011:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8010;
+ goto l901;
+
+ // end i loop
+ l901:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l800;
+ goto l90;
+
+ // end j loop
+ l90:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l81;
+ goto l9;
+
+ // begin j loop
+ l81:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l810;
+ goto l811;
+
+ // begin i loop
+ l810:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8100;
+ goto l8101;
+
+ // kernel
+ l8100:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8101;
+ goto l910;
+
+ // kernel
+ l8101:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8100;
+ goto l910;
+
+ // end i loop
+ l910:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l811;
+ goto l91;
+
+ // begin i loop
+ l811:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8110;
+ goto l8111;
+
+ // kernel
+ l8110:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8111;
+ goto l911;
+
+ // kernel
+ l8111:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8110;
+ goto l911;
+
+ // end i loop
+ l911:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l810;
+ goto l91;
+
+ // end j loop
+ l91:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l80;
+ goto l9;
+
+ // end k loop
+ l9:;
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ prolongate_3d_cc_o0_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // CarpetLib
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc
new file mode 100644
index 000000000..42fc078ee
--- /dev/null
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o1_rf2.cc
@@ -0,0 +1,390 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_3d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstiext, dstjext, dstkext)
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o1_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox)
+ {
+ typedef typename typeprops<T>::real RT;
+
+
+
+ if (any (srcbbox.stride() <= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: source strides are not twice the destination strides");
+ }
+
+ if (any (dstbbox.stride() % 2 != 0)) {
+ CCTK_WARN (0, "Internal error: destination strides are not even");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+
+
+ ivect3 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0));
+ ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0));
+ ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride();
+
+
+
+ ivect3 const offsetlo = 1;
+ ivect3 const offsethi = 1;
+
+
+
+ if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or
+ not regbbox .is_contained_in(dstbbox))
+ {
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ size_t const srciext = srcext[0];
+ size_t const srcjext = srcext[1];
+ size_t const srckext = srcext[2];
+
+ size_t const dstiext = dstext[0];
+ size_t const dstjext = dstext[1];
+ size_t const dstkext = dstext[2];
+
+ size_t const regiext = regext[0];
+ size_t const regjext = regext[1];
+ size_t const regkext = regext[2];
+
+ size_t const srcioff = srcoff[0];
+ size_t const srcjoff = srcoff[1];
+ size_t const srckoff = srcoff[2];
+
+ size_t const dstioff = dstoff[0];
+ size_t const dstjoff = dstoff[1];
+ size_t const dstkoff = dstoff[2];
+
+
+
+ size_t const fi = srcioff % 2;
+ size_t const fj = srcjoff % 2;
+ size_t const fk = srckoff % 2;
+
+ size_t const i0 = srcioff / 2;
+ size_t const j0 = srcjoff / 2;
+ size_t const k0 = srckoff / 2;
+
+ RT const one = 1;
+
+ RT const f1 = one/4;
+ RT const f2 = 3*one/4;
+
+
+
+ // Loop over fine region
+ // Label scheme: l 8 fk fj fi
+
+ size_t is, js, ks;
+ size_t id, jd, kd;
+ size_t i, j, k;
+
+ // begin k loop
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l80;
+ goto l81;
+
+ // begin j loop
+ l80:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l800;
+ goto l801;
+
+ // begin i loop
+ l800:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8000;
+ goto l8001;
+
+ // kernel
+ l8000:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8001;
+ goto l900;
+
+ // kernel
+ l8001:
+ dst[DSTIND3(id,jd,kd)] =
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8000;
+ goto l900;
+
+ // end i loop
+ l900:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l801;
+ goto l90;
+
+ // begin i loop
+ l801:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8010;
+ goto l8011;
+
+ // kernel
+ l8010:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8011;
+ goto l901;
+
+ // kernel
+ l8011:
+ dst[DSTIND3(id,jd,kd)] =
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8010;
+ goto l901;
+
+ // end i loop
+ l901:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l800;
+ goto l90;
+
+ // end j loop
+ l90:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l81;
+ goto l9;
+
+ // begin j loop
+ l81:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l810;
+ goto l811;
+
+ // begin i loop
+ l810:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8100;
+ goto l8101;
+
+ // kernel
+ l8100:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8101;
+ goto l910;
+
+ // kernel
+ l8101:
+ dst[DSTIND3(id,jd,kd)] =
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8100;
+ goto l910;
+
+ // end i loop
+ l910:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l811;
+ goto l91;
+
+ // begin i loop
+ l811:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8110;
+ goto l8111;
+
+ // kernel
+ l8110:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8111;
+ goto l911;
+
+ // kernel
+ l8111:
+ dst[DSTIND3(id,jd,kd)] =
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8110;
+ goto l911;
+
+ // end i loop
+ l911:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l810;
+ goto l91;
+
+ // end j loop
+ l91:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l80;
+ goto l9;
+
+ // end k loop
+ l9:;
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ prolongate_3d_cc_o1_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // CarpetLib
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc
new file mode 100644
index 000000000..f55dfe9fa
--- /dev/null
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_o2_rf2.cc
@@ -0,0 +1,545 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_3d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstiext, dstjext, dstkext)
+
+
+
+ template <typename T>
+ void
+ prolongate_3d_cc_o2_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox)
+ {
+ typedef typename typeprops<T>::real RT;
+
+
+
+ if (any (srcbbox.stride() <= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: source strides are not twice the destination strides");
+ }
+
+ if (any (dstbbox.stride() % 2 != 0)) {
+ CCTK_WARN (0, "Internal error: destination strides are not even");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+
+
+ ivect3 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) % regbbox.stride() == 0));
+ ivect3 const srcoff = (regbbox.lower() - srcbbox.lower() + regbbox.stride() / 2) / regbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0));
+ ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride();
+
+
+
+ bvect3 const needoffsetlo = srcoff % reffact2 != 0;
+ bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0;
+ ivect3 const offsetlo = either (needoffsetlo, 2, 1);
+ ivect3 const offsethi = either (needoffsethi, 2, 1);
+
+
+
+ if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or
+ not regbbox .is_contained_in(dstbbox))
+ {
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ size_t const srciext = srcext[0];
+ size_t const srcjext = srcext[1];
+ size_t const srckext = srcext[2];
+
+ size_t const dstiext = dstext[0];
+ size_t const dstjext = dstext[1];
+ size_t const dstkext = dstext[2];
+
+ size_t const regiext = regext[0];
+ size_t const regjext = regext[1];
+ size_t const regkext = regext[2];
+
+ size_t const srcioff = srcoff[0];
+ size_t const srcjoff = srcoff[1];
+ size_t const srckoff = srcoff[2];
+
+ size_t const dstioff = dstoff[0];
+ size_t const dstjoff = dstoff[1];
+ size_t const dstkoff = dstoff[2];
+
+
+
+ size_t const fi = srcioff % 2;
+ size_t const fj = srcjoff % 2;
+ size_t const fk = srckoff % 2;
+
+ size_t const i0 = srcioff / 2;
+ size_t const j0 = srcjoff / 2;
+ size_t const k0 = srckoff / 2;
+
+ RT const one = 1;
+
+ RT const f1 = 5*one/32;
+ RT const f2 = 30*one/32;
+ RT const f3 = -3*one/32;
+
+
+
+ // Loop over fine region
+ // Label scheme: l 8 fk fj fi
+
+ size_t is, js, ks;
+ size_t id, jd, kd;
+ size_t i, j, k;
+
+ // begin k loop
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l80;
+ goto l81;
+
+ // begin j loop
+ l80:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l800;
+ goto l801;
+
+ // begin i loop
+ l800:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8000;
+ goto l8001;
+
+ // kernel
+ l8000:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f3*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f3*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f1*f3*f1 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f3*f3*f1 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f3*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f1*f3*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f3*f3*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f1*f1*f3 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js-1,ks+1)]
+ + f3*f1*f3 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f1*f2*f3 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)]
+ + f3*f2*f3 * src[SRCIND3(is+1,js ,ks+1)]
+ + f1*f3*f3 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js+1,ks+1)]
+ + f3*f3*f3 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8001;
+ goto l900;
+
+ // kernel
+ l8001:
+ dst[DSTIND3(id,jd,kd)] =
+ + f3*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f3*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f3*f3*f1 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f3*f1 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f3*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f3*f3*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f3*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f3*f1*f3 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f1*f3 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f3*f2*f3 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f3 * src[SRCIND3(is+1,js ,ks+1)]
+ + f3*f3*f3 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js+1,ks+1)]
+ + f1*f3*f3 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8000;
+ goto l900;
+
+ // end i loop
+ l900:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l801;
+ goto l90;
+
+ // begin i loop
+ l801:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8010;
+ goto l8011;
+
+ // kernel
+ l8010:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f3*f1 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f3*f3*f1 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f3*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f3*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f1*f3*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f3*f3*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f3*f1*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f1*f3*f3 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js-1,ks+1)]
+ + f3*f3*f3 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f1*f2*f3 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)]
+ + f3*f2*f3 * src[SRCIND3(is+1,js ,ks+1)]
+ + f1*f1*f3 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js+1,ks+1)]
+ + f3*f1*f3 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8011;
+ goto l901;
+
+ // kernel
+ l8011:
+ dst[DSTIND3(id,jd,kd)] =
+ + f3*f3*f1 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f3*f1 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f3*f2*f1 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks-1)]
+ + f3*f1*f1 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f3*f3*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f3*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f3*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f3*f3*f3 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f3*f3 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f3*f2*f3 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f3 * src[SRCIND3(is+1,js ,ks+1)]
+ + f3*f1*f3 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js+1,ks+1)]
+ + f1*f1*f3 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8010;
+ goto l901;
+
+ // end i loop
+ l901:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l800;
+ goto l90;
+
+ // end j loop
+ l90:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l81;
+ goto l9;
+
+ // begin j loop
+ l81:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l810;
+ goto l811;
+
+ // begin i loop
+ l810:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8100;
+ goto l8101;
+
+ // kernel
+ l8100:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1*f3 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js-1,ks-1)]
+ + f3*f1*f3 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f1*f2*f3 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)]
+ + f3*f2*f3 * src[SRCIND3(is+1,js ,ks-1)]
+ + f1*f3*f3 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js+1,ks-1)]
+ + f3*f3*f3 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f1*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f3*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f1*f3*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f3*f3*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f1*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f3*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f3*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]
+ + f1*f3*f1 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js+1,ks+1)]
+ + f3*f3*f1 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8101;
+ goto l910;
+
+ // kernel
+ l8101:
+ dst[DSTIND3(id,jd,kd)] =
+ + f3*f1*f3 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f1*f3 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f3*f2*f3 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f3 * src[SRCIND3(is+1,js ,ks-1)]
+ + f3*f3*f3 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f3*f3 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f3*f1*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f3*f3*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f3*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f3*f1*f1 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f3*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]
+ + f3*f3*f1 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js+1,ks+1)]
+ + f1*f3*f1 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8100;
+ goto l910;
+
+ // end i loop
+ l910:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l811;
+ goto l91;
+
+ // begin i loop
+ l811:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8110;
+ goto l8111;
+
+ // kernel
+ l8110:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f3*f3 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js-1,ks-1)]
+ + f3*f3*f3 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f1*f2*f3 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)]
+ + f3*f2*f3 * src[SRCIND3(is+1,js ,ks-1)]
+ + f1*f1*f3 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js+1,ks-1)]
+ + f3*f1*f3 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f1*f3*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f3*f3*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f1*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f3*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f1*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f3*f1*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f1*f3*f1 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f3*f3*f1 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f3*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)]
+ + f3*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8111;
+ goto l911;
+
+ // kernel
+ l8111:
+ dst[DSTIND3(id,jd,kd)] =
+ + f3*f3*f3 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f2*f3*f3 * src[SRCIND3(is ,js-1,ks-1)]
+ + f1*f3*f3 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f3*f2*f3 * src[SRCIND3(is-1,js ,ks-1)]
+ + f2*f2*f3 * src[SRCIND3(is ,js ,ks-1)]
+ + f1*f2*f3 * src[SRCIND3(is+1,js ,ks-1)]
+ + f3*f1*f3 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f2*f1*f3 * src[SRCIND3(is ,js+1,ks-1)]
+ + f1*f1*f3 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f3*f3*f2 * src[SRCIND3(is-1,js-1,ks )]
+ + f2*f3*f2 * src[SRCIND3(is ,js-1,ks )]
+ + f1*f3*f2 * src[SRCIND3(is+1,js-1,ks )]
+ + f3*f2*f2 * src[SRCIND3(is-1,js ,ks )]
+ + f2*f2*f2 * src[SRCIND3(is ,js ,ks )]
+ + f1*f2*f2 * src[SRCIND3(is+1,js ,ks )]
+ + f3*f1*f2 * src[SRCIND3(is-1,js+1,ks )]
+ + f2*f1*f2 * src[SRCIND3(is ,js+1,ks )]
+ + f1*f1*f2 * src[SRCIND3(is+1,js+1,ks )]
+ + f3*f3*f1 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f2*f3*f1 * src[SRCIND3(is ,js-1,ks+1)]
+ + f1*f3*f1 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f3*f2*f1 * src[SRCIND3(is-1,js ,ks+1)]
+ + f2*f2*f1 * src[SRCIND3(is ,js ,ks+1)]
+ + f1*f2*f1 * src[SRCIND3(is+1,js ,ks+1)]
+ + f3*f1*f1 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f2*f1*f1 * src[SRCIND3(is ,js+1,ks+1)]
+ + f1*f1*f1 * src[SRCIND3(is+1,js+1,ks+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8110;
+ goto l911;
+
+ // end i loop
+ l911:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l810;
+ goto l91;
+
+ // end j loop
+ l91:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l80;
+ goto l9;
+
+ // end k loop
+ l9:;
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ prolongate_3d_cc_o2_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // CarpetLib
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
index f58295560..04e00b255 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
@@ -11,7 +11,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -120,7 +120,7 @@ namespace CarpetLib {
-#pragma omp parallel for
+ // NOTE: This loop is not parallel
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
for (int i=0; i<regiext; ++i) {
@@ -139,13 +139,13 @@ namespace CarpetLib {
// + src [SRCIND2(i-1, j-1)];
// 3D
dst [DSTIND3(i, j, k)] =
- + dst [DSTIND3(i-1, j, k)]
- + dst [DSTIND3(i, j-1, k)]
- + dst [DSTIND3(i, j, k-1)]
- - dst [DSTIND3(i, j-1, k-1)]
- - dst [DSTIND3(i-1, j, k-1)]
- - dst [DSTIND3(i-1, j-1, k)]
+ dst [DSTIND3(i-1, j-1, k-1)]
+ - dst [DSTIND3(i , j-1, k-1)]
+ - dst [DSTIND3(i-1, j , k-1)]
+ + dst [DSTIND3(i , j , k-1)]
+ - dst [DSTIND3(i-1, j-1, k )]
+ + dst [DSTIND3(i , j-1, k )]
+ + dst [DSTIND3(i-1, j , k )]
+ src [SRCIND3(i-1, j-1, k-1)];
}
}
@@ -172,14 +172,14 @@ namespace CarpetLib {
// Convert from the "primitive" form of the grid function to the
- // "standard" version
+ // "standard" version, i.e., the derivative
template <typename T>
void
prolongate_3d_cc_rf2_prim2std (T const * restrict const src,
- ivect const & restrict srcext,
+ ivect3 const & restrict srcext,
T * restrict const dst,
- ivect const & restrict dstext,
+ ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
ibbox3 const & restrict regbbox)
@@ -253,18 +253,19 @@ namespace CarpetLib {
-#pragma omp parallel for
+ // NOTE: This loop is not parallel
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
for (int i=0; i<regiext; ++i) {
- dst [DSTIND3(i, j, k)] = reffact2 *
- (- src [SRCIND3(i, j+1, k+1)]
- - src [SRCIND3(i+1, j, k+1)]
- - src [SRCIND3(i+1, j+1, k)]
- + src [SRCIND3(i+1, j, k)]
- + src [SRCIND3(i, j+1, k)]
- + src [SRCIND3(i, j, k+1)]
- - src [SRCIND3(i, j, k)]
+ dst [DSTIND3(i, j, k)] =
+ (reffact2 * reffact2 * reffact2) *
+ (- src [SRCIND3(i , j , k )]
+ + src [SRCIND3(i+1, j , k )]
+ + src [SRCIND3(i , j+1, k )]
+ - src [SRCIND3(i+1, j+1, k )]
+ + src [SRCIND3(i , j , k+1)]
+ - src [SRCIND3(i+1, j , k+1)]
+ - src [SRCIND3(i , j+1, k+1)]
+ src [SRCIND3(i+1, j+1, k+1)]);
}
}
@@ -278,9 +279,9 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2_prim2std (T const * restrict const src, \
- ivect const & restrict srcext, \
+ ivect3 const & restrict srcext, \
T * restrict const dst, \
- ivect const & restrict dstext, \
+ ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
ibbox3 const & restrict regbbox);
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc
index 6fe17e03d..a1d26660e 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o11_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -231,7 +231,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -239,7 +239,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -247,7 +247,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = interp0<T> (& src[SRCIND3(is,js,ks)]);
i = i+1;
id = id+1;
@@ -255,7 +255,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is-5,js,ks)], srcdi);
i = i+1;
id = id+1;
@@ -264,14 +264,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -279,7 +279,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js-5,ks)], srcdj);
i = i+1;
id = id+1;
@@ -287,7 +287,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-5,js-5,ks)], srcdi, srcdj);
i = i+1;
@@ -297,7 +297,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -305,14 +305,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -320,7 +320,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -328,7 +328,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js,ks-5)], srcdk);
i = i+1;
id = id+1;
@@ -336,7 +336,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-5,js,ks-5)], srcdi, srcdj);
i = i+1;
@@ -346,14 +346,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -361,7 +361,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is,js-5,ks-5)], srcdj, srcdk);
i = i+1;
@@ -370,7 +370,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
dst[DSTIND3(id,jd,kd)] =
interp3<T> (& src[SRCIND3(is-5,js-5,ks-5)], srcdi, srcdj, srcdk);
@@ -382,7 +382,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -390,7 +390,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -398,7 +398,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc
index 72c52f05e..0b1ff0751 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o1_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -139,7 +139,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -147,7 +147,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -155,7 +155,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
i = i+1;
id = id+1;
@@ -163,7 +163,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is ,js,ks)]
+ f2 * src[SRCIND3(is+1,js,ks)];
@@ -174,14 +174,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -189,7 +189,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js ,ks)]
+ f2 * src[SRCIND3(is,js+1,ks)];
@@ -199,7 +199,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is ,js ,ks)]
+ f2*f1 * src[SRCIND3(is+1,js ,ks)]
@@ -212,7 +212,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -220,14 +220,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -235,7 +235,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -243,7 +243,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js,ks )]
+ f2 * src[SRCIND3(is,js,ks+1)];
@@ -253,7 +253,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is ,js,ks )]
+ f2*f1 * src[SRCIND3(is+1,js,ks )]
@@ -266,14 +266,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -281,7 +281,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is,js ,ks )]
+ f2*f1 * src[SRCIND3(is,js+1,ks )]
@@ -293,7 +293,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
T const res1 =
+ f1*f1*f1 * src[SRCIND3(is ,js ,ks )]
@@ -314,7 +314,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -322,7 +322,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -330,7 +330,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc
index a55bd69da..91b3e5bc1 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o3_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -31,7 +31,7 @@ namespace CarpetLib {
prolongate_3d_o3_rf2 (T const * restrict const src,
ivect3 const & restrict srcext,
T * restrict const dst,
- ivect const & restrict dstext,
+ ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
ibbox3 const & restrict regbbox)
@@ -143,7 +143,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -151,7 +151,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -159,7 +159,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
i = i+1;
id = id+1;
@@ -167,7 +167,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is-1,js,ks)]
+ f2 * src[SRCIND3(is ,js,ks)]
@@ -180,14 +180,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -195,7 +195,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js-1,ks)]
+ f2 * src[SRCIND3(is,js ,ks)]
@@ -207,7 +207,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is-1,js-1,ks)]
+ f2*f1 * src[SRCIND3(is ,js-1,ks)]
@@ -232,7 +232,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -240,14 +240,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -255,7 +255,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -263,7 +263,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js,ks-1)]
+ f2 * src[SRCIND3(is,js,ks )]
@@ -275,7 +275,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is-1,js,ks-1)]
+ f2*f1 * src[SRCIND3(is ,js,ks-1)]
@@ -300,14 +300,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -315,7 +315,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is,js-1,ks-1)]
+ f2*f1 * src[SRCIND3(is,js ,ks-1)]
@@ -339,7 +339,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
T const res1 =
+ f1*f1*f1 * src[SRCIND3(is-1,js-1,ks-1)]
@@ -418,7 +418,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -426,7 +426,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -434,7 +434,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc
new file mode 100644
index 000000000..9ac0b2f7e
--- /dev/null
+++ b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc
@@ -0,0 +1,851 @@
+// This is meant to reproduce the prolongation algorithm used in the
+// SACRA code (based on IH's interpretation of their papers and
+// comments in talks, so it might be an idea for someone to talk to
+// them! Of course, given that this is "general purpose" and SACRA is
+// very specific in the variables converted, it probably won't be
+// possible to get a perfect reproduction).
+//
+// The idea is to use fifth order Lagrange interpolation based on the
+// nearest 6 points (in any one dimension). However, we must also
+// ensure monotonicity. To do this we check that the result of the
+// fifth order result (which is just copied from prolongate_3d_o5_rf2)
+// is monotonic with respect to the relevant neighbours), and if not
+// we impose linear interpolation instead (from prolongate_3d_o1_rf2).
+//
+// Note that this code does not work for complex GFs (due to the use
+// of the max and min intrinsics).
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_3d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstiext, dstjext, dstkext)
+
+
+ template <typename T>
+ inline
+ T
+ min4 (T const & x1, T const & x2, T const & x3, T const & x4)
+ {
+ return min (min(x1, x2), min (x3, x4));
+ }
+
+ template <typename T>
+ inline
+ T
+ max4 (T const & x1, T const & x2, T const & x3, T const & x4)
+ {
+ return max (max(x1, x2), max (x3, x4));
+ }
+
+ template <typename T>
+ inline
+ T
+ min8 (T const & x1, T const & x2, T const & x3, T const & x4,
+ T const & x5, T const & x6, T const & x7, T const & x8)
+ {
+ return min( min (min(x1, x2), min (x3, x4)),
+ min (min(x5, x6), min (x7, x8)) );
+ }
+
+ template <typename T>
+ inline
+ T
+ max8 (T const & x1, T const & x2, T const & x3, T const & x4,
+ T const & x5, T const & x6, T const & x7, T const & x8)
+ {
+ return max( max (max(x1, x2), max (x3, x4)),
+ max (max(x5, x6), max (x7, x8)) );
+ }
+
+
+ template <typename T>
+ void
+ prolongate_3d_o5_monotone_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox)
+ {
+ typedef typename typeprops<T>::real RT;
+
+
+
+ if (any (srcbbox.stride() <= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: source strides are not twice the destination strides");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+
+
+ ivect3 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower()) % regbbox.stride() == 0));
+ ivect3 const srcoff = (regbbox.lower() - srcbbox.lower()) / regbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0));
+ ivect3 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride();
+
+
+
+ bvect3 const needoffsetlo = srcoff % reffact2 != 0 or regext > 1;
+ bvect3 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0 or regext > 1;
+ ivect3 const offsetlo = either (needoffsetlo, 3, 0);
+ ivect3 const offsethi = either (needoffsethi, 3, 0);
+
+
+
+ if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or
+ not regbbox .is_contained_in(dstbbox))
+ {
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ size_t const srciext = srcext[0];
+ size_t const srcjext = srcext[1];
+ size_t const srckext = srcext[2];
+
+ size_t const dstiext = dstext[0];
+ size_t const dstjext = dstext[1];
+ size_t const dstkext = dstext[2];
+
+ size_t const regiext = regext[0];
+ size_t const regjext = regext[1];
+ size_t const regkext = regext[2];
+
+ size_t const srcioff = srcoff[0];
+ size_t const srcjoff = srcoff[1];
+ size_t const srckoff = srcoff[2];
+
+ size_t const dstioff = dstoff[0];
+ size_t const dstjoff = dstoff[1];
+ size_t const dstkoff = dstoff[2];
+
+
+
+ size_t const fi = srcioff % 2;
+ size_t const fj = srcjoff % 2;
+ size_t const fk = srckoff % 2;
+
+ size_t const i0 = srcioff / 2;
+ size_t const j0 = srcjoff / 2;
+ size_t const k0 = srckoff / 2;
+
+ RT const one = 1;
+
+ RT const f1 = 3*one/256;
+ RT const f2 = - 25*one/256;
+ RT const f3 = 150*one/256;
+ RT const f4 = 150*one/256;
+ RT const f5 = - 25*one/256;
+ RT const f6 = 3*one/256;
+
+ RT const o1_f1 = one/2;
+ RT const o1_f2 = one/2;
+
+
+ // Loop over fine region
+ // Label scheme: l 8 fk fj fi
+
+ size_t is, js, ks;
+ size_t id, jd, kd;
+ size_t i, j, k;
+
+ // begin k loop
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l80;
+ goto l81;
+
+ // begin j loop
+ l80:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l800;
+ goto l801;
+
+ // begin i loop
+ l800:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8000;
+ goto l8001;
+
+ // kernel
+ l8000:
+ dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8001;
+ goto l900;
+
+ // kernel
+ l8001:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1 * src[SRCIND3(is-2,js,ks)]
+ + f2 * src[SRCIND3(is-1,js,ks)]
+ + f3 * src[SRCIND3(is ,js,ks)]
+ + f4 * src[SRCIND3(is+1,js,ks)]
+ + f5 * src[SRCIND3(is+2,js,ks)]
+ + f6 * src[SRCIND3(is+3,js,ks)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )]))||
+ (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1 * src[SRCIND3(is ,js,ks)]
+ + o1_f2 * src[SRCIND3(is+1,js,ks)];
+
+ }
+
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8000;
+ goto l900;
+
+ // end i loop
+ l900:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l801;
+ goto l90;
+
+ // begin i loop
+ l801:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8010;
+ goto l8011;
+
+ // kernel
+ l8010:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1 * src[SRCIND3(is,js-2,ks)]
+ + f2 * src[SRCIND3(is,js-1,ks)]
+ + f3 * src[SRCIND3(is,js ,ks)]
+ + f4 * src[SRCIND3(is,js+1,ks)]
+ + f5 * src[SRCIND3(is,js+2,ks)]
+ + f6 * src[SRCIND3(is,js+3,ks)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )]))||
+ (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1 * src[SRCIND3(is,js ,ks)]
+ + o1_f2 * src[SRCIND3(is,js+1,ks)];
+
+ }
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8011;
+ goto l901;
+
+ // kernel
+ l8011:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1 * src[SRCIND3(is-2,js-2,ks)]
+ + f2*f1 * src[SRCIND3(is-1,js-2,ks)]
+ + f3*f1 * src[SRCIND3(is ,js-2,ks)]
+ + f4*f1 * src[SRCIND3(is+1,js-2,ks)]
+ + f5*f1 * src[SRCIND3(is+2,js-2,ks)]
+ + f6*f1 * src[SRCIND3(is+3,js-2,ks)]
+ + f1*f2 * src[SRCIND3(is-2,js-1,ks)]
+ + f2*f2 * src[SRCIND3(is-1,js-1,ks)]
+ + f3*f2 * src[SRCIND3(is ,js-1,ks)]
+ + f4*f2 * src[SRCIND3(is+1,js-1,ks)]
+ + f5*f2 * src[SRCIND3(is+2,js-1,ks)]
+ + f6*f2 * src[SRCIND3(is+3,js-1,ks)]
+ + f1*f3 * src[SRCIND3(is-2,js ,ks)]
+ + f2*f3 * src[SRCIND3(is-1,js ,ks)]
+ + f3*f3 * src[SRCIND3(is ,js ,ks)]
+ + f4*f3 * src[SRCIND3(is+1,js ,ks)]
+ + f5*f3 * src[SRCIND3(is+2,js ,ks)]
+ + f6*f3 * src[SRCIND3(is+3,js ,ks)]
+ + f1*f4 * src[SRCIND3(is-2,js+1,ks)]
+ + f2*f4 * src[SRCIND3(is-1,js+1,ks)]
+ + f3*f4 * src[SRCIND3(is ,js+1,ks)]
+ + f4*f4 * src[SRCIND3(is+1,js+1,ks)]
+ + f5*f4 * src[SRCIND3(is+2,js+1,ks)]
+ + f6*f4 * src[SRCIND3(is+3,js+1,ks)]
+ + f1*f5 * src[SRCIND3(is-2,js+2,ks)]
+ + f2*f5 * src[SRCIND3(is-1,js+2,ks)]
+ + f3*f5 * src[SRCIND3(is ,js+2,ks)]
+ + f4*f5 * src[SRCIND3(is+1,js+2,ks)]
+ + f5*f5 * src[SRCIND3(is+2,js+2,ks)]
+ + f6*f5 * src[SRCIND3(is+3,js+2,ks)]
+ + f1*f6 * src[SRCIND3(is-2,js+3,ks)]
+ + f2*f6 * src[SRCIND3(is-1,js+3,ks)]
+ + f3*f6 * src[SRCIND3(is ,js+3,ks)]
+ + f4*f6 * src[SRCIND3(is+1,js+3,ks)]
+ + f5*f6 * src[SRCIND3(is+2,js+3,ks)]
+ + f6*f6 * src[SRCIND3(is+3,js+3,ks)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is+1,js+1,ks )]))||
+ (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is+1,js+1,ks )]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1*o1_f1 * src[SRCIND3(is ,js ,ks)]
+ + o1_f2*o1_f1 * src[SRCIND3(is+1,js ,ks)]
+ + o1_f1*o1_f2 * src[SRCIND3(is ,js+1,ks)]
+ + o1_f2*o1_f2 * src[SRCIND3(is+1,js+1,ks)];
+ }
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8010;
+ goto l901;
+
+ // end i loop
+ l901:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l800;
+ goto l90;
+
+ // end j loop
+ l90:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l81;
+ goto l9;
+
+ // begin j loop
+ l81:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l810;
+ goto l811;
+
+ // begin i loop
+ l810:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8100;
+ goto l8101;
+
+ // kernel
+ l8100:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1 * src[SRCIND3(is,js,ks-2)]
+ + f2 * src[SRCIND3(is,js,ks-1)]
+ + f3 * src[SRCIND3(is,js,ks )]
+ + f4 * src[SRCIND3(is,js,ks+1)]
+ + f5 * src[SRCIND3(is,js,ks+2)]
+ + f6 * src[SRCIND3(is,js,ks+3)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js ,ks+1)]))||
+ (dst[DSTIND3(id,jd,kd)] < min(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js ,ks+1)]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1 * src[SRCIND3(is,js,ks )]
+ + o1_f2 * src[SRCIND3(is,js,ks+1)];
+ }
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8101;
+ goto l910;
+
+ // kernel
+ l8101:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1 * src[SRCIND3(is-2,js,ks-2)]
+ + f2*f1 * src[SRCIND3(is-1,js,ks-2)]
+ + f3*f1 * src[SRCIND3(is ,js,ks-2)]
+ + f4*f1 * src[SRCIND3(is+1,js,ks-2)]
+ + f5*f1 * src[SRCIND3(is+2,js,ks-2)]
+ + f6*f1 * src[SRCIND3(is+3,js,ks-2)]
+ + f1*f2 * src[SRCIND3(is-2,js,ks-1)]
+ + f2*f2 * src[SRCIND3(is-1,js,ks-1)]
+ + f3*f2 * src[SRCIND3(is ,js,ks-1)]
+ + f4*f2 * src[SRCIND3(is+1,js,ks-1)]
+ + f5*f2 * src[SRCIND3(is+2,js,ks-1)]
+ + f6*f2 * src[SRCIND3(is+3,js,ks-1)]
+ + f1*f3 * src[SRCIND3(is-2,js,ks )]
+ + f2*f3 * src[SRCIND3(is-1,js,ks )]
+ + f3*f3 * src[SRCIND3(is ,js,ks )]
+ + f4*f3 * src[SRCIND3(is+1,js,ks )]
+ + f5*f3 * src[SRCIND3(is+2,js,ks )]
+ + f6*f3 * src[SRCIND3(is+3,js,ks )]
+ + f1*f4 * src[SRCIND3(is-2,js,ks+1)]
+ + f2*f4 * src[SRCIND3(is-1,js,ks+1)]
+ + f3*f4 * src[SRCIND3(is ,js,ks+1)]
+ + f4*f4 * src[SRCIND3(is+1,js,ks+1)]
+ + f5*f4 * src[SRCIND3(is+2,js,ks+1)]
+ + f6*f4 * src[SRCIND3(is+3,js,ks+1)]
+ + f1*f5 * src[SRCIND3(is-2,js,ks+2)]
+ + f2*f5 * src[SRCIND3(is-1,js,ks+2)]
+ + f3*f5 * src[SRCIND3(is ,js,ks+2)]
+ + f4*f5 * src[SRCIND3(is+1,js,ks+2)]
+ + f5*f5 * src[SRCIND3(is+2,js,ks+2)]
+ + f6*f5 * src[SRCIND3(is+3,js,ks+2)]
+ + f1*f6 * src[SRCIND3(is-2,js,ks+3)]
+ + f2*f6 * src[SRCIND3(is-1,js,ks+3)]
+ + f3*f6 * src[SRCIND3(is ,js,ks+3)]
+ + f4*f6 * src[SRCIND3(is+1,js,ks+3)]
+ + f5*f6 * src[SRCIND3(is+2,js,ks+3)]
+ + f6*f6 * src[SRCIND3(is+3,js,ks+3)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is+1,js ,ks+1)]))||
+ (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is+1,js ,ks+1)]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1*o1_f1 * src[SRCIND3(is ,js,ks )]
+ + o1_f2*o1_f1 * src[SRCIND3(is+1,js,ks )]
+ + o1_f1*o1_f2 * src[SRCIND3(is ,js,ks+1)]
+ + o1_f2*o1_f2 * src[SRCIND3(is+1,js,ks+1)];
+ }
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8100;
+ goto l910;
+
+ // end i loop
+ l910:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l811;
+ goto l91;
+
+ // begin i loop
+ l811:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l8110;
+ goto l8111;
+
+ // kernel
+ l8110:
+ dst[DSTIND3(id,jd,kd)] =
+ + f1*f1 * src[SRCIND3(is,js-2,ks-2)]
+ + f2*f1 * src[SRCIND3(is,js-1,ks-2)]
+ + f3*f1 * src[SRCIND3(is,js ,ks-2)]
+ + f4*f1 * src[SRCIND3(is,js+1,ks-2)]
+ + f5*f1 * src[SRCIND3(is,js+2,ks-2)]
+ + f6*f1 * src[SRCIND3(is,js+3,ks-2)]
+ + f1*f2 * src[SRCIND3(is,js-2,ks-1)]
+ + f2*f2 * src[SRCIND3(is,js-1,ks-1)]
+ + f3*f2 * src[SRCIND3(is,js ,ks-1)]
+ + f4*f2 * src[SRCIND3(is,js+1,ks-1)]
+ + f5*f2 * src[SRCIND3(is,js+2,ks-1)]
+ + f6*f2 * src[SRCIND3(is,js+3,ks-1)]
+ + f1*f3 * src[SRCIND3(is,js-2,ks )]
+ + f2*f3 * src[SRCIND3(is,js-1,ks )]
+ + f3*f3 * src[SRCIND3(is,js ,ks )]
+ + f4*f3 * src[SRCIND3(is,js+1,ks )]
+ + f5*f3 * src[SRCIND3(is,js+2,ks )]
+ + f6*f3 * src[SRCIND3(is,js+3,ks )]
+ + f1*f4 * src[SRCIND3(is,js-2,ks+1)]
+ + f2*f4 * src[SRCIND3(is,js-1,ks+1)]
+ + f3*f4 * src[SRCIND3(is,js ,ks+1)]
+ + f4*f4 * src[SRCIND3(is,js+1,ks+1)]
+ + f5*f4 * src[SRCIND3(is,js+2,ks+1)]
+ + f6*f4 * src[SRCIND3(is,js+3,ks+1)]
+ + f1*f5 * src[SRCIND3(is,js-2,ks+2)]
+ + f2*f5 * src[SRCIND3(is,js-1,ks+2)]
+ + f3*f5 * src[SRCIND3(is,js ,ks+2)]
+ + f4*f5 * src[SRCIND3(is,js+1,ks+2)]
+ + f5*f5 * src[SRCIND3(is,js+2,ks+2)]
+ + f6*f5 * src[SRCIND3(is,js+3,ks+2)]
+ + f1*f6 * src[SRCIND3(is,js-2,ks+3)]
+ + f2*f6 * src[SRCIND3(is,js-1,ks+3)]
+ + f3*f6 * src[SRCIND3(is,js ,ks+3)]
+ + f4*f6 * src[SRCIND3(is,js+1,ks+3)]
+ + f5*f6 * src[SRCIND3(is,js+2,ks+3)]
+ + f6*f6 * src[SRCIND3(is,js+3,ks+3)];
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is ,js+1,ks+1)]))||
+ (dst[DSTIND3(id,jd,kd)] < min4(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is ,js+1,ks+1)]))) {
+ dst[DSTIND3(id,jd,kd)] =
+ + o1_f1*o1_f1 * src[SRCIND3(is,js ,ks )]
+ + o1_f2*o1_f1 * src[SRCIND3(is,js+1,ks )]
+ + o1_f1*o1_f2 * src[SRCIND3(is,js ,ks+1)]
+ + o1_f2*o1_f2 * src[SRCIND3(is,js+1,ks+1)];
+ }
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l8111;
+ goto l911;
+
+ // kernel
+ l8111:
+ {
+ T const res1 =
+ + f1*f1*f1 * src[SRCIND3(is-2,js-2,ks-2)]
+ + f2*f1*f1 * src[SRCIND3(is-1,js-2,ks-2)]
+ + f3*f1*f1 * src[SRCIND3(is ,js-2,ks-2)]
+ + f4*f1*f1 * src[SRCIND3(is+1,js-2,ks-2)]
+ + f5*f1*f1 * src[SRCIND3(is+2,js-2,ks-2)]
+ + f6*f1*f1 * src[SRCIND3(is+3,js-2,ks-2)]
+ + f1*f2*f1 * src[SRCIND3(is-2,js-1,ks-2)]
+ + f2*f2*f1 * src[SRCIND3(is-1,js-1,ks-2)]
+ + f3*f2*f1 * src[SRCIND3(is ,js-1,ks-2)]
+ + f4*f2*f1 * src[SRCIND3(is+1,js-1,ks-2)]
+ + f5*f2*f1 * src[SRCIND3(is+2,js-1,ks-2)]
+ + f6*f2*f1 * src[SRCIND3(is+3,js-1,ks-2)]
+ + f1*f3*f1 * src[SRCIND3(is-2,js ,ks-2)]
+ + f2*f3*f1 * src[SRCIND3(is-1,js ,ks-2)]
+ + f3*f3*f1 * src[SRCIND3(is ,js ,ks-2)]
+ + f4*f3*f1 * src[SRCIND3(is+1,js ,ks-2)]
+ + f5*f3*f1 * src[SRCIND3(is+2,js ,ks-2)]
+ + f6*f3*f1 * src[SRCIND3(is+3,js ,ks-2)]
+ + f1*f4*f1 * src[SRCIND3(is-2,js+1,ks-2)]
+ + f2*f4*f1 * src[SRCIND3(is-1,js+1,ks-2)]
+ + f3*f4*f1 * src[SRCIND3(is ,js+1,ks-2)]
+ + f4*f4*f1 * src[SRCIND3(is+1,js+1,ks-2)]
+ + f5*f4*f1 * src[SRCIND3(is+2,js+1,ks-2)]
+ + f6*f4*f1 * src[SRCIND3(is+3,js+1,ks-2)]
+ + f1*f5*f1 * src[SRCIND3(is-2,js+2,ks-2)]
+ + f2*f5*f1 * src[SRCIND3(is-1,js+2,ks-2)]
+ + f3*f5*f1 * src[SRCIND3(is ,js+2,ks-2)]
+ + f4*f5*f1 * src[SRCIND3(is+1,js+2,ks-2)]
+ + f5*f5*f1 * src[SRCIND3(is+2,js+2,ks-2)]
+ + f6*f5*f1 * src[SRCIND3(is+3,js+2,ks-2)]
+ + f1*f6*f1 * src[SRCIND3(is-2,js+3,ks-2)]
+ + f2*f6*f1 * src[SRCIND3(is-1,js+3,ks-2)]
+ + f3*f6*f1 * src[SRCIND3(is ,js+3,ks-2)]
+ + f4*f6*f1 * src[SRCIND3(is+1,js+3,ks-2)]
+ + f5*f6*f1 * src[SRCIND3(is+2,js+3,ks-2)]
+ + f6*f6*f1 * src[SRCIND3(is+3,js+3,ks-2)];
+ T const res2 =
+ + f1*f1*f2 * src[SRCIND3(is-2,js-2,ks-1)]
+ + f2*f1*f2 * src[SRCIND3(is-1,js-2,ks-1)]
+ + f3*f1*f2 * src[SRCIND3(is ,js-2,ks-1)]
+ + f4*f1*f2 * src[SRCIND3(is+1,js-2,ks-1)]
+ + f5*f1*f2 * src[SRCIND3(is+2,js-2,ks-1)]
+ + f6*f1*f2 * src[SRCIND3(is+3,js-2,ks-1)]
+ + f1*f2*f2 * src[SRCIND3(is-2,js-1,ks-1)]
+ + f2*f2*f2 * src[SRCIND3(is-1,js-1,ks-1)]
+ + f3*f2*f2 * src[SRCIND3(is ,js-1,ks-1)]
+ + f4*f2*f2 * src[SRCIND3(is+1,js-1,ks-1)]
+ + f5*f2*f2 * src[SRCIND3(is+2,js-1,ks-1)]
+ + f6*f2*f2 * src[SRCIND3(is+3,js-1,ks-1)]
+ + f1*f3*f2 * src[SRCIND3(is-2,js ,ks-1)]
+ + f2*f3*f2 * src[SRCIND3(is-1,js ,ks-1)]
+ + f3*f3*f2 * src[SRCIND3(is ,js ,ks-1)]
+ + f4*f3*f2 * src[SRCIND3(is+1,js ,ks-1)]
+ + f5*f3*f2 * src[SRCIND3(is+2,js ,ks-1)]
+ + f6*f3*f2 * src[SRCIND3(is+3,js ,ks-1)]
+ + f1*f4*f2 * src[SRCIND3(is-2,js+1,ks-1)]
+ + f2*f4*f2 * src[SRCIND3(is-1,js+1,ks-1)]
+ + f3*f4*f2 * src[SRCIND3(is ,js+1,ks-1)]
+ + f4*f4*f2 * src[SRCIND3(is+1,js+1,ks-1)]
+ + f5*f4*f2 * src[SRCIND3(is+2,js+1,ks-1)]
+ + f6*f4*f2 * src[SRCIND3(is+3,js+1,ks-1)]
+ + f1*f5*f2 * src[SRCIND3(is-2,js+2,ks-1)]
+ + f2*f5*f2 * src[SRCIND3(is-1,js+2,ks-1)]
+ + f3*f5*f2 * src[SRCIND3(is ,js+2,ks-1)]
+ + f4*f5*f2 * src[SRCIND3(is+1,js+2,ks-1)]
+ + f5*f5*f2 * src[SRCIND3(is+2,js+2,ks-1)]
+ + f6*f5*f2 * src[SRCIND3(is+3,js+2,ks-1)]
+ + f1*f6*f2 * src[SRCIND3(is-2,js+3,ks-1)]
+ + f2*f6*f2 * src[SRCIND3(is-1,js+3,ks-1)]
+ + f3*f6*f2 * src[SRCIND3(is ,js+3,ks-1)]
+ + f4*f6*f2 * src[SRCIND3(is+1,js+3,ks-1)]
+ + f5*f6*f2 * src[SRCIND3(is+2,js+3,ks-1)]
+ + f6*f6*f2 * src[SRCIND3(is+3,js+3,ks-1)];
+ T const res3 =
+ + f1*f1*f3 * src[SRCIND3(is-2,js-2,ks )]
+ + f2*f1*f3 * src[SRCIND3(is-1,js-2,ks )]
+ + f3*f1*f3 * src[SRCIND3(is ,js-2,ks )]
+ + f4*f1*f3 * src[SRCIND3(is+1,js-2,ks )]
+ + f5*f1*f3 * src[SRCIND3(is+2,js-2,ks )]
+ + f6*f1*f3 * src[SRCIND3(is+3,js-2,ks )]
+ + f1*f2*f3 * src[SRCIND3(is-2,js-1,ks )]
+ + f2*f2*f3 * src[SRCIND3(is-1,js-1,ks )]
+ + f3*f2*f3 * src[SRCIND3(is ,js-1,ks )]
+ + f4*f2*f3 * src[SRCIND3(is+1,js-1,ks )]
+ + f5*f2*f3 * src[SRCIND3(is+2,js-1,ks )]
+ + f6*f2*f3 * src[SRCIND3(is+3,js-1,ks )]
+ + f1*f3*f3 * src[SRCIND3(is-2,js ,ks )]
+ + f2*f3*f3 * src[SRCIND3(is-1,js ,ks )]
+ + f3*f3*f3 * src[SRCIND3(is ,js ,ks )]
+ + f4*f3*f3 * src[SRCIND3(is+1,js ,ks )]
+ + f5*f3*f3 * src[SRCIND3(is+2,js ,ks )]
+ + f6*f3*f3 * src[SRCIND3(is+3,js ,ks )]
+ + f1*f4*f3 * src[SRCIND3(is-2,js+1,ks )]
+ + f2*f4*f3 * src[SRCIND3(is-1,js+1,ks )]
+ + f3*f4*f3 * src[SRCIND3(is ,js+1,ks )]
+ + f4*f4*f3 * src[SRCIND3(is+1,js+1,ks )]
+ + f5*f4*f3 * src[SRCIND3(is+2,js+1,ks )]
+ + f6*f4*f3 * src[SRCIND3(is+3,js+1,ks )]
+ + f1*f5*f3 * src[SRCIND3(is-2,js+2,ks )]
+ + f2*f5*f3 * src[SRCIND3(is-1,js+2,ks )]
+ + f3*f5*f3 * src[SRCIND3(is ,js+2,ks )]
+ + f4*f5*f3 * src[SRCIND3(is+1,js+2,ks )]
+ + f5*f5*f3 * src[SRCIND3(is+2,js+2,ks )]
+ + f6*f5*f3 * src[SRCIND3(is+3,js+2,ks )]
+ + f1*f6*f3 * src[SRCIND3(is-2,js+3,ks )]
+ + f2*f6*f3 * src[SRCIND3(is-1,js+3,ks )]
+ + f3*f6*f3 * src[SRCIND3(is ,js+3,ks )]
+ + f4*f6*f3 * src[SRCIND3(is+1,js+3,ks )]
+ + f5*f6*f3 * src[SRCIND3(is+2,js+3,ks )]
+ + f6*f6*f3 * src[SRCIND3(is+3,js+3,ks )];
+ T const res4 =
+ + f1*f1*f4 * src[SRCIND3(is-2,js-2,ks+1)]
+ + f2*f1*f4 * src[SRCIND3(is-1,js-2,ks+1)]
+ + f3*f1*f4 * src[SRCIND3(is ,js-2,ks+1)]
+ + f4*f1*f4 * src[SRCIND3(is+1,js-2,ks+1)]
+ + f5*f1*f4 * src[SRCIND3(is+2,js-2,ks+1)]
+ + f6*f1*f4 * src[SRCIND3(is+3,js-2,ks+1)]
+ + f1*f2*f4 * src[SRCIND3(is-2,js-1,ks+1)]
+ + f2*f2*f4 * src[SRCIND3(is-1,js-1,ks+1)]
+ + f3*f2*f4 * src[SRCIND3(is ,js-1,ks+1)]
+ + f4*f2*f4 * src[SRCIND3(is+1,js-1,ks+1)]
+ + f5*f2*f4 * src[SRCIND3(is+2,js-1,ks+1)]
+ + f6*f2*f4 * src[SRCIND3(is+3,js-1,ks+1)]
+ + f1*f3*f4 * src[SRCIND3(is-2,js ,ks+1)]
+ + f2*f3*f4 * src[SRCIND3(is-1,js ,ks+1)]
+ + f3*f3*f4 * src[SRCIND3(is ,js ,ks+1)]
+ + f4*f3*f4 * src[SRCIND3(is+1,js ,ks+1)]
+ + f5*f3*f4 * src[SRCIND3(is+2,js ,ks+1)]
+ + f6*f3*f4 * src[SRCIND3(is+3,js ,ks+1)]
+ + f1*f4*f4 * src[SRCIND3(is-2,js+1,ks+1)]
+ + f2*f4*f4 * src[SRCIND3(is-1,js+1,ks+1)]
+ + f3*f4*f4 * src[SRCIND3(is ,js+1,ks+1)]
+ + f4*f4*f4 * src[SRCIND3(is+1,js+1,ks+1)]
+ + f5*f4*f4 * src[SRCIND3(is+2,js+1,ks+1)]
+ + f6*f4*f4 * src[SRCIND3(is+3,js+1,ks+1)]
+ + f1*f5*f4 * src[SRCIND3(is-2,js+2,ks+1)]
+ + f2*f5*f4 * src[SRCIND3(is-1,js+2,ks+1)]
+ + f3*f5*f4 * src[SRCIND3(is ,js+2,ks+1)]
+ + f4*f5*f4 * src[SRCIND3(is+1,js+2,ks+1)]
+ + f5*f5*f4 * src[SRCIND3(is+2,js+2,ks+1)]
+ + f6*f5*f4 * src[SRCIND3(is+3,js+2,ks+1)]
+ + f1*f6*f4 * src[SRCIND3(is-2,js+3,ks+1)]
+ + f2*f6*f4 * src[SRCIND3(is-1,js+3,ks+1)]
+ + f3*f6*f4 * src[SRCIND3(is ,js+3,ks+1)]
+ + f4*f6*f4 * src[SRCIND3(is+1,js+3,ks+1)]
+ + f5*f6*f4 * src[SRCIND3(is+2,js+3,ks+1)]
+ + f6*f6*f4 * src[SRCIND3(is+3,js+3,ks+1)];
+ T const res5 =
+ + f1*f1*f5 * src[SRCIND3(is-2,js-2,ks+2)]
+ + f2*f1*f5 * src[SRCIND3(is-1,js-2,ks+2)]
+ + f3*f1*f5 * src[SRCIND3(is ,js-2,ks+2)]
+ + f4*f1*f5 * src[SRCIND3(is+1,js-2,ks+2)]
+ + f5*f1*f5 * src[SRCIND3(is+2,js-2,ks+2)]
+ + f6*f1*f5 * src[SRCIND3(is+3,js-2,ks+2)]
+ + f1*f2*f5 * src[SRCIND3(is-2,js-1,ks+2)]
+ + f2*f2*f5 * src[SRCIND3(is-1,js-1,ks+2)]
+ + f3*f2*f5 * src[SRCIND3(is ,js-1,ks+2)]
+ + f4*f2*f5 * src[SRCIND3(is+1,js-1,ks+2)]
+ + f5*f2*f5 * src[SRCIND3(is+2,js-1,ks+2)]
+ + f6*f2*f5 * src[SRCIND3(is+3,js-1,ks+2)]
+ + f1*f3*f5 * src[SRCIND3(is-2,js ,ks+2)]
+ + f2*f3*f5 * src[SRCIND3(is-1,js ,ks+2)]
+ + f3*f3*f5 * src[SRCIND3(is ,js ,ks+2)]
+ + f4*f3*f5 * src[SRCIND3(is+1,js ,ks+2)]
+ + f5*f3*f5 * src[SRCIND3(is+2,js ,ks+2)]
+ + f6*f3*f5 * src[SRCIND3(is+3,js ,ks+2)]
+ + f1*f4*f5 * src[SRCIND3(is-2,js+1,ks+2)]
+ + f2*f4*f5 * src[SRCIND3(is-1,js+1,ks+2)]
+ + f3*f4*f5 * src[SRCIND3(is ,js+1,ks+2)]
+ + f4*f4*f5 * src[SRCIND3(is+1,js+1,ks+2)]
+ + f5*f4*f5 * src[SRCIND3(is+2,js+1,ks+2)]
+ + f6*f4*f5 * src[SRCIND3(is+3,js+1,ks+2)]
+ + f1*f5*f5 * src[SRCIND3(is-2,js+2,ks+2)]
+ + f2*f5*f5 * src[SRCIND3(is-1,js+2,ks+2)]
+ + f3*f5*f5 * src[SRCIND3(is ,js+2,ks+2)]
+ + f4*f5*f5 * src[SRCIND3(is+1,js+2,ks+2)]
+ + f5*f5*f5 * src[SRCIND3(is+2,js+2,ks+2)]
+ + f6*f5*f5 * src[SRCIND3(is+3,js+2,ks+2)]
+ + f1*f6*f5 * src[SRCIND3(is-2,js+3,ks+2)]
+ + f2*f6*f5 * src[SRCIND3(is-1,js+3,ks+2)]
+ + f3*f6*f5 * src[SRCIND3(is ,js+3,ks+2)]
+ + f4*f6*f5 * src[SRCIND3(is+1,js+3,ks+2)]
+ + f5*f6*f5 * src[SRCIND3(is+2,js+3,ks+2)]
+ + f6*f6*f5 * src[SRCIND3(is+3,js+3,ks+2)];
+ T const res6 =
+ + f1*f1*f6 * src[SRCIND3(is-2,js-2,ks+3)]
+ + f2*f1*f6 * src[SRCIND3(is-1,js-2,ks+3)]
+ + f3*f1*f6 * src[SRCIND3(is ,js-2,ks+3)]
+ + f4*f1*f6 * src[SRCIND3(is+1,js-2,ks+3)]
+ + f5*f1*f6 * src[SRCIND3(is+2,js-2,ks+3)]
+ + f6*f1*f6 * src[SRCIND3(is+3,js-2,ks+3)]
+ + f1*f2*f6 * src[SRCIND3(is-2,js-1,ks+3)]
+ + f2*f2*f6 * src[SRCIND3(is-1,js-1,ks+3)]
+ + f3*f2*f6 * src[SRCIND3(is ,js-1,ks+3)]
+ + f4*f2*f6 * src[SRCIND3(is+1,js-1,ks+3)]
+ + f5*f2*f6 * src[SRCIND3(is+2,js-1,ks+3)]
+ + f6*f2*f6 * src[SRCIND3(is+3,js-1,ks+3)]
+ + f1*f3*f6 * src[SRCIND3(is-2,js ,ks+3)]
+ + f2*f3*f6 * src[SRCIND3(is-1,js ,ks+3)]
+ + f3*f3*f6 * src[SRCIND3(is ,js ,ks+3)]
+ + f4*f3*f6 * src[SRCIND3(is+1,js ,ks+3)]
+ + f5*f3*f6 * src[SRCIND3(is+2,js ,ks+3)]
+ + f6*f3*f6 * src[SRCIND3(is+3,js ,ks+3)]
+ + f1*f4*f6 * src[SRCIND3(is-2,js+1,ks+3)]
+ + f2*f4*f6 * src[SRCIND3(is-1,js+1,ks+3)]
+ + f3*f4*f6 * src[SRCIND3(is ,js+1,ks+3)]
+ + f4*f4*f6 * src[SRCIND3(is+1,js+1,ks+3)]
+ + f5*f4*f6 * src[SRCIND3(is+2,js+1,ks+3)]
+ + f6*f4*f6 * src[SRCIND3(is+3,js+1,ks+3)]
+ + f1*f5*f6 * src[SRCIND3(is-2,js+2,ks+3)]
+ + f2*f5*f6 * src[SRCIND3(is-1,js+2,ks+3)]
+ + f3*f5*f6 * src[SRCIND3(is ,js+2,ks+3)]
+ + f4*f5*f6 * src[SRCIND3(is+1,js+2,ks+3)]
+ + f5*f5*f6 * src[SRCIND3(is+2,js+2,ks+3)]
+ + f6*f5*f6 * src[SRCIND3(is+3,js+2,ks+3)]
+ + f1*f6*f6 * src[SRCIND3(is-2,js+3,ks+3)]
+ + f2*f6*f6 * src[SRCIND3(is-1,js+3,ks+3)]
+ + f3*f6*f6 * src[SRCIND3(is ,js+3,ks+3)]
+ + f4*f6*f6 * src[SRCIND3(is+1,js+3,ks+3)]
+ + f5*f6*f6 * src[SRCIND3(is+2,js+3,ks+3)]
+ + f6*f6*f6 * src[SRCIND3(is+3,js+3,ks+3)];
+ dst[DSTIND3(id,jd,kd)] = res1 + res2 + res3 + res4 + res5 + res6;
+ // Monotonicity enforcement
+ if ((dst[DSTIND3(id,jd,kd)] > max8(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is+1,js+1,ks )],
+ src[SRCIND3(is+1,js ,ks+1)],
+ src[SRCIND3(is ,js+1,ks+1)],
+ src[SRCIND3(is+1,js+1,ks+1)]))||
+ (dst[DSTIND3(id,jd,kd)] < min8(src[SRCIND3(is ,js ,ks )],
+ src[SRCIND3(is+1,js ,ks )],
+ src[SRCIND3(is ,js+1,ks )],
+ src[SRCIND3(is ,js ,ks+1)],
+ src[SRCIND3(is+1,js+1,ks )],
+ src[SRCIND3(is+1,js ,ks+1)],
+ src[SRCIND3(is ,js+1,ks+1)],
+ src[SRCIND3(is+1,js+1,ks+1)]))) {
+ T const res1 =
+ + o1_f1*o1_f1*o1_f1 * src[SRCIND3(is ,js ,ks )]
+ + o1_f2*o1_f1*o1_f1 * src[SRCIND3(is+1,js ,ks )]
+ + o1_f1*o1_f2*o1_f1 * src[SRCIND3(is ,js+1,ks )]
+ + o1_f2*o1_f2*o1_f1 * src[SRCIND3(is+1,js+1,ks )];
+ T const res2 =
+ + o1_f1*o1_f1*o1_f2 * src[SRCIND3(is ,js ,ks+1)]
+ + o1_f2*o1_f1*o1_f2 * src[SRCIND3(is+1,js ,ks+1)]
+ + o1_f1*o1_f2*o1_f2 * src[SRCIND3(is ,js+1,ks+1)]
+ + o1_f2*o1_f2*o1_f2 * src[SRCIND3(is+1,js+1,ks+1)];
+ dst[DSTIND3(id,jd,kd)] = res1 + res2;
+ }
+ }
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l8110;
+ goto l911;
+
+ // end i loop
+ l911:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l810;
+ goto l91;
+
+ // end j loop
+ l91:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l80;
+ goto l9;
+
+ // end k loop
+ l9:;
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ prolongate_3d_o5_monotone_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict regbbox);
+#define CARPET_NO_COMPLEX
+#include "instantiate"
+#undef CARPET_NO_COMPLEX
+#undef INSTANTIATE
+
+ template <>
+ void
+ prolongate_3d_o5_monotone_rf2 (CCTK_COMPLEX const * restrict const src,
+ ivect3 const & restrict srcext,
+ CCTK_COMPLEX * restrict const dst,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict regbbox)
+ {
+ CCTK_WARN(0, "This should never be called!");
+ }
+
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc
index 71133b004..6cbde8cde 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o5_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -143,7 +143,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -151,7 +151,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -159,7 +159,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = src[SRCIND3(is,js,ks)];
i = i+1;
id = id+1;
@@ -167,7 +167,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is-2,js,ks)]
+ f2 * src[SRCIND3(is-1,js,ks)]
@@ -182,14 +182,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -197,7 +197,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js-2,ks)]
+ f2 * src[SRCIND3(is,js-1,ks)]
@@ -211,7 +211,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is-2,js-2,ks)]
+ f2*f1 * src[SRCIND3(is-1,js-2,ks)]
@@ -256,7 +256,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -264,14 +264,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -279,7 +279,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -287,7 +287,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] =
+ f1 * src[SRCIND3(is,js,ks-2)]
+ f2 * src[SRCIND3(is,js,ks-1)]
@@ -301,7 +301,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is-2,js,ks-2)]
+ f2*f1 * src[SRCIND3(is-1,js,ks-2)]
@@ -346,14 +346,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -361,7 +361,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
+ f1*f1 * src[SRCIND3(is,js-2,ks-2)]
+ f2*f1 * src[SRCIND3(is,js-1,ks-2)]
@@ -405,7 +405,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
T const res1 =
+ f1*f1*f1 * src[SRCIND3(is-2,js-2,ks-2)]
@@ -638,7 +638,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -646,7 +646,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -654,7 +654,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
index b65f1632d..44ee78d01 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o7_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -227,7 +227,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -235,7 +235,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -243,7 +243,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = interp0<T> (& src[SRCIND3(is,js,ks)]);
i = i+1;
id = id+1;
@@ -251,7 +251,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is-3,js,ks)], srcdi);
i = i+1;
id = id+1;
@@ -260,14 +260,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -275,7 +275,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js-3,ks)], srcdj);
i = i+1;
id = id+1;
@@ -283,7 +283,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-3,js-3,ks)], srcdi, srcdj);
i = i+1;
@@ -293,7 +293,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -301,14 +301,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -316,7 +316,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -324,7 +324,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js,ks-3)], srcdk);
i = i+1;
id = id+1;
@@ -332,7 +332,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-3,js,ks-3)], srcdi, srcdj);
i = i+1;
@@ -342,14 +342,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -357,7 +357,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is,js-3,ks-3)], srcdj, srcdk);
i = i+1;
@@ -366,7 +366,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
dst[DSTIND3(id,jd,kd)] =
interp3<T> (& src[SRCIND3(is-3,js-3,ks-3)], srcdi, srcdj, srcdk);
@@ -378,7 +378,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -386,7 +386,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -394,7 +394,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc
index a01a3bd35..04e9a97b8 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o9_rf2.cc
@@ -6,7 +6,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
@@ -229,7 +229,7 @@ namespace CarpetLib {
goto l81;
// begin j loop
- l80:
+ l80:
j = 0;
js = j0;
jd = dstjoff;
@@ -237,7 +237,7 @@ namespace CarpetLib {
goto l801;
// begin i loop
- l800:
+ l800:
i = 0;
is = i0;
id = dstioff;
@@ -245,7 +245,7 @@ namespace CarpetLib {
goto l8001;
// kernel
- l8000:
+ l8000:
dst[DSTIND3(id,jd,kd)] = interp0<T> (& src[SRCIND3(is,js,ks)]);
i = i+1;
id = id+1;
@@ -253,7 +253,7 @@ namespace CarpetLib {
goto l900;
// kernel
- l8001:
+ l8001:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is-4,js,ks)], srcdi);
i = i+1;
id = id+1;
@@ -262,14 +262,14 @@ namespace CarpetLib {
goto l900;
// end i loop
- l900:
+ l900:
j = j+1;
jd = jd+1;
if (j < regjext) goto l801;
goto l90;
// begin i loop
- l801:
+ l801:
i = 0;
is = i0;
id = dstioff;
@@ -277,7 +277,7 @@ namespace CarpetLib {
goto l8011;
// kernel
- l8010:
+ l8010:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js-4,ks)], srcdj);
i = i+1;
id = id+1;
@@ -285,7 +285,7 @@ namespace CarpetLib {
goto l901;
// kernel
- l8011:
+ l8011:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-4,js-4,ks)], srcdi, srcdj);
i = i+1;
@@ -295,7 +295,7 @@ namespace CarpetLib {
goto l901;
// end i loop
- l901:
+ l901:
j = j+1;
jd = jd+1;
js = js+1;
@@ -303,14 +303,14 @@ namespace CarpetLib {
goto l90;
// end j loop
- l90:
+ l90:
k = k+1;
kd = kd+1;
if (k < regkext) goto l81;
goto l9;
// begin j loop
- l81:
+ l81:
j = 0;
js = j0;
jd = dstjoff;
@@ -318,7 +318,7 @@ namespace CarpetLib {
goto l811;
// begin i loop
- l810:
+ l810:
i = 0;
is = i0;
id = dstioff;
@@ -326,7 +326,7 @@ namespace CarpetLib {
goto l8101;
// kernel
- l8100:
+ l8100:
dst[DSTIND3(id,jd,kd)] = interp1<T> (& src[SRCIND3(is,js,ks-4)], srcdk);
i = i+1;
id = id+1;
@@ -334,7 +334,7 @@ namespace CarpetLib {
goto l910;
// kernel
- l8101:
+ l8101:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is-4,js,ks-4)], srcdi, srcdj);
i = i+1;
@@ -344,14 +344,14 @@ namespace CarpetLib {
goto l910;
// end i loop
- l910:
+ l910:
j = j+1;
jd = jd+1;
if (j < regjext) goto l811;
goto l91;
// begin i loop
- l811:
+ l811:
i = 0;
is = i0;
id = dstioff;
@@ -359,7 +359,7 @@ namespace CarpetLib {
goto l8111;
// kernel
- l8110:
+ l8110:
dst[DSTIND3(id,jd,kd)] =
interp2<T> (& src[SRCIND3(is,js-4,ks-4)], srcdj, srcdk);
i = i+1;
@@ -368,7 +368,7 @@ namespace CarpetLib {
goto l911;
// kernel
- l8111:
+ l8111:
{
dst[DSTIND3(id,jd,kd)] =
interp3<T> (& src[SRCIND3(is-4,js-4,ks-4)], srcdi, srcdj, srcdk);
@@ -380,7 +380,7 @@ namespace CarpetLib {
goto l911;
// end i loop
- l911:
+ l911:
j = j+1;
jd = jd+1;
js = js+1;
@@ -388,7 +388,7 @@ namespace CarpetLib {
goto l91;
// end j loop
- l91:
+ l91:
k = k+1;
kd = kd+1;
ks = ks+1;
@@ -396,7 +396,7 @@ namespace CarpetLib {
goto l9;
// end k loop
- l9:;
+ l9:;
}
diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc
new file mode 100644
index 000000000..4c8022916
--- /dev/null
+++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc
@@ -0,0 +1,602 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_4d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND4(i,j,k,l) \
+ index4 (i, j, k, l, \
+ srciext, srcjext, srckext, srclext)
+#define DSTIND4(i,j,k,l) \
+ index4 (i, j, k, l, \
+ dstiext, dstjext, dstkext, dstlext)
+
+
+
+ template <typename T>
+ void
+ prolongate_4d_o1_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox)
+ {
+ typedef typename typeprops<T>::real RT;
+
+
+
+ if (any (srcbbox.stride() <= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: source strides are not twice the destination strides");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+
+
+ ivect4 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower()) % regbbox.stride() == 0));
+ ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / regbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0));
+ ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride();
+
+
+
+ bvect4 const needoffsetlo = srcoff % reffact2 != 0 or regext > 1;
+ bvect4 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0 or regext > 1;
+ ivect4 const offsetlo = either (needoffsetlo, 1, 0);
+ ivect4 const offsethi = either (needoffsethi, 1, 0);
+
+
+
+ if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or
+ not regbbox .is_contained_in(dstbbox))
+ {
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ size_t const srciext = srcext[0];
+ size_t const srcjext = srcext[1];
+ size_t const srckext = srcext[2];
+ size_t const srclext = srcext[3];
+
+ size_t const dstiext = dstext[0];
+ size_t const dstjext = dstext[1];
+ size_t const dstkext = dstext[2];
+ size_t const dstlext = dstext[3];
+
+ size_t const regiext = regext[0];
+ size_t const regjext = regext[1];
+ size_t const regkext = regext[2];
+ size_t const reglext = regext[3];
+
+ size_t const srcioff = srcoff[0];
+ size_t const srcjoff = srcoff[1];
+ size_t const srckoff = srcoff[2];
+ size_t const srcloff = srcoff[3];
+
+ size_t const dstioff = dstoff[0];
+ size_t const dstjoff = dstoff[1];
+ size_t const dstkoff = dstoff[2];
+ size_t const dstloff = dstoff[3];
+
+
+
+ size_t const fi = srcioff % 2;
+ size_t const fj = srcjoff % 2;
+ size_t const fk = srckoff % 2;
+ size_t const fl = srcloff % 2;
+
+ size_t const i0 = srcioff / 2;
+ size_t const j0 = srcjoff / 2;
+ size_t const k0 = srckoff / 2;
+ size_t const l0 = srcloff / 2;
+
+ RT const one = 1;
+
+ RT const f1 = one/2;
+ RT const f2 = one/2;
+
+
+
+ // Loop over fine region
+ // Label scheme: l 8 fl fk fj fi
+
+ size_t is, js, ks, ls;
+ size_t id, jd, kd, ld;
+ size_t i, j, k, l;
+
+ // begin l loop
+ l = 0;
+ ls = l0;
+ ld = dstloff;
+ if (fl == 0) goto l80;
+ goto l81;
+
+ // begin k loop
+ l80:
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l800;
+ goto l801;
+
+ // begin j loop
+ l800:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l8000;
+ goto l8001;
+
+ // begin i loop
+ l8000:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l80000;
+ goto l80001;
+
+ // kernel
+ l80000:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + src[SRCIND4(is,js,ks,ls)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l80001;
+ goto l9000;
+
+ // kernel
+ l80001:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1 * src[SRCIND4(is ,js,ks,ls)]
+ + f2 * src[SRCIND4(is+1,js,ks,ls)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l80000;
+ goto l9000;
+
+ // end i loop
+ l9000:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l8001;
+ goto l900;
+
+ // begin i loop
+ l8001:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l80010;
+ goto l80011;
+
+ // kernel
+ l80010:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1 * src[SRCIND4(is,js ,ks,ls)]
+ + f2 * src[SRCIND4(is,js+1,ks,ls)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l80011;
+ goto l9001;
+
+ // kernel
+ l80011:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is ,js ,ks,ls)]
+ + f2*f1 * src[SRCIND4(is+1,js ,ks,ls)]
+ + f1*f2 * src[SRCIND4(is ,js+1,ks,ls)]
+ + f2*f2 * src[SRCIND4(is+1,js+1,ks,ls)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l80010;
+ goto l9001;
+
+ // end i loop
+ l9001:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l8000;
+ goto l900;
+
+ // end j loop
+ l900:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l800;
+ goto l90;
+
+ // begin j loop
+ l801:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l8010;
+ goto l8011;
+
+ // begin i loop
+ l8010:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l80100;
+ goto l80101;
+
+ // kernel
+ l80100:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1 * src[SRCIND4(is,js,ks ,ls)]
+ + f2 * src[SRCIND4(is,js,ks+1,ls)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l80101;
+ goto l9010;
+
+ // kernel
+ l80101:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is ,js,ks ,ls)]
+ + f2*f1 * src[SRCIND4(is+1,js,ks ,ls)]
+ + f1*f2 * src[SRCIND4(is ,js,ks+1,ls)]
+ + f2*f2 * src[SRCIND4(is+1,js,ks+1,ls)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l80100;
+ goto l9010;
+
+ // end i loop
+ l9010:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l8011;
+ goto l901;
+
+ // begin i loop
+ l8011:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l80110;
+ goto l80111;
+
+ // kernel
+ l80110:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is,js ,ks ,ls)]
+ + f2*f1 * src[SRCIND4(is,js+1,ks ,ls)]
+ + f1*f2 * src[SRCIND4(is,js ,ks+1,ls)]
+ + f2*f2 * src[SRCIND4(is,js+1,ks+1,ls)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l80111;
+ goto l9011;
+
+ // kernel
+ l80111:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls)]
+ + f2*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls)]
+ + f1*f2*f1 * src[SRCIND4(is ,js+1,ks ,ls)]
+ + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks ,ls)]
+ + f1*f1*f2 * src[SRCIND4(is ,js ,ks+1,ls)]
+ + f2*f1*f2 * src[SRCIND4(is+1,js ,ks+1,ls)]
+ + f1*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls)]
+ + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l80110;
+ goto l9011;
+
+ // end i loop
+ l9011:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l8010;
+ goto l901;
+
+ // end j loop
+ l901:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l800;
+ goto l90;
+
+ // end k loop
+ l90:
+ l = l+1;
+ ld = ld+1;
+ ls = ls+1;
+ if (l < reglext) goto l81;
+ goto l80;
+
+ // begin k loop
+ l81:
+ k = 0;
+ ks = k0;
+ kd = dstkoff;
+ if (fk == 0) goto l810;
+ goto l811;
+
+ // begin j loop
+ l810:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l8100;
+ goto l8101;
+
+ // begin i loop
+ l8100:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l81000;
+ goto l81001;
+
+ // kernel
+ l81000:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1 * src[SRCIND4(is,js,ks,ls )]
+ + f2 * src[SRCIND4(is,js,ks,ls+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l81001;
+ goto l9100;
+
+ // kernel
+ l81001:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is ,js,ks,ls )]
+ + f2*f1 * src[SRCIND4(is+1,js,ks,ls )]
+ + f1*f2 * src[SRCIND4(is ,js,ks,ls+1)]
+ + f2*f2 * src[SRCIND4(is+1,js,ks,ls+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l81000;
+ goto l9100;
+
+ // end i loop
+ l9100:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l8101;
+ goto l910;
+
+ // begin i loop
+ l8101:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l81010;
+ goto l81011;
+
+ // kernel
+ l81010:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is,js ,ks,ls )]
+ + f2*f1 * src[SRCIND4(is,js+1,ks,ls )]
+ + f1*f2 * src[SRCIND4(is,js ,ks,ls+1)]
+ + f2*f2 * src[SRCIND4(is,js+1,ks,ls+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l81011;
+ goto l9101;
+
+ // kernel
+ l81011:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1*f1 * src[SRCIND4(is ,js ,ks,ls )]
+ + f2*f1*f1 * src[SRCIND4(is+1,js ,ks,ls )]
+ + f1*f2*f1 * src[SRCIND4(is ,js+1,ks,ls )]
+ + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks,ls )]
+ + f1*f1*f2 * src[SRCIND4(is ,js ,ks,ls+1)]
+ + f2*f1*f2 * src[SRCIND4(is+1,js ,ks,ls+1)]
+ + f1*f2*f2 * src[SRCIND4(is ,js+1,ks,ls+1)]
+ + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks,ls+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l81010;
+ goto l9101;
+
+ // end i loop
+ l9101:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l8100;
+ goto l910;
+
+ // end j loop
+ l910:
+ k = k+1;
+ kd = kd+1;
+ if (k < regkext) goto l810;
+ goto l91;
+
+ // begin j loop
+ l811:
+ j = 0;
+ js = j0;
+ jd = dstjoff;
+ if (fj == 0) goto l8110;
+ goto l8111;
+
+ // begin i loop
+ l8110:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l81100;
+ goto l81101;
+
+ // kernel
+ l81100:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1 * src[SRCIND4(is,js,ks ,ls )]
+ + f2*f1 * src[SRCIND4(is,js,ks+1,ls )]
+ + f1*f2 * src[SRCIND4(is,js,ks ,ls+1)]
+ + f2*f2 * src[SRCIND4(is,js,ks+1,ls+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l81101;
+ goto l9110;
+
+ // kernel
+ l81101:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1*f1 * src[SRCIND4(is ,js,ks ,ls )]
+ + f2*f1*f1 * src[SRCIND4(is+1,js,ks ,ls )]
+ + f1*f2*f1 * src[SRCIND4(is ,js,ks+1,ls )]
+ + f2*f2*f1 * src[SRCIND4(is+1,js,ks+1,ls )]
+ + f1*f1*f2 * src[SRCIND4(is ,js,ks ,ls+1)]
+ + f2*f1*f2 * src[SRCIND4(is+1,js,ks ,ls+1)]
+ + f1*f2*f2 * src[SRCIND4(is ,js,ks+1,ls+1)]
+ + f2*f2*f2 * src[SRCIND4(is+1,js,ks+1,ls+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l81100;
+ goto l9110;
+
+ // end i loop
+ l9110:
+ j = j+1;
+ jd = jd+1;
+ if (j < regjext) goto l8111;
+ goto l911;
+
+ // begin i loop
+ l8111:
+ i = 0;
+ is = i0;
+ id = dstioff;
+ if (fi == 0) goto l81110;
+ goto l81111;
+
+ // kernel
+ l81110:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1*f1*f1 * src[SRCIND4(is,js ,ks ,ls )]
+ + f2*f1*f1*f1 * src[SRCIND4(is,js+1,ks ,ls )]
+ + f1*f2*f2*f1 * src[SRCIND4(is,js ,ks+1,ls )]
+ + f2*f2*f2*f1 * src[SRCIND4(is,js+1,ks+1,ls )]
+ + f1*f1*f1*f2 * src[SRCIND4(is,js ,ks ,ls+1)]
+ + f2*f1*f1*f2 * src[SRCIND4(is,js+1,ks ,ls+1)]
+ + f1*f2*f2*f2 * src[SRCIND4(is,js ,ks+1,ls+1)]
+ + f2*f2*f2*f2 * src[SRCIND4(is,js+1,ks+1,ls+1)];
+ i = i+1;
+ id = id+1;
+ if (i < regiext) goto l81111;
+ goto l9111;
+
+ // kernel
+ l81111:
+ dst[DSTIND4(id,jd,kd,ld)] =
+ + f1*f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls )]
+ + f2*f1*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls )]
+ + f1*f2*f1*f1 * src[SRCIND4(is ,js+1,ks ,ls )]
+ + f2*f2*f1*f1 * src[SRCIND4(is+1,js+1,ks ,ls )]
+ + f1*f1*f2*f1 * src[SRCIND4(is ,js ,ks+1,ls )]
+ + f2*f1*f2*f1 * src[SRCIND4(is+1,js ,ks+1,ls )]
+ + f1*f2*f2*f1 * src[SRCIND4(is ,js+1,ks+1,ls )]
+ + f2*f2*f2*f1 * src[SRCIND4(is+1,js+1,ks+1,ls )]
+ + f1*f1*f1*f2 * src[SRCIND4(is ,js ,ks ,ls+1)]
+ + f2*f1*f1*f2 * src[SRCIND4(is+1,js ,ks ,ls+1)]
+ + f1*f2*f1*f2 * src[SRCIND4(is ,js+1,ks ,ls+1)]
+ + f2*f2*f1*f2 * src[SRCIND4(is+1,js+1,ks ,ls+1)]
+ + f1*f1*f2*f2 * src[SRCIND4(is ,js ,ks+1,ls+1)]
+ + f2*f1*f2*f2 * src[SRCIND4(is+1,js ,ks+1,ls+1)]
+ + f1*f2*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls+1)]
+ + f2*f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls+1)];
+ i = i+1;
+ id = id+1;
+ is = is+1;
+ if (i < regiext) goto l81110;
+ goto l9111;
+
+ // end i loop
+ l9111:
+ j = j+1;
+ jd = jd+1;
+ js = js+1;
+ if (j < regjext) goto l8110;
+ goto l911;
+
+ // end j loop
+ l911:
+ k = k+1;
+ kd = kd+1;
+ ks = ks+1;
+ if (k < regkext) goto l810;
+ goto l91;
+
+ // end k loop
+ l91:
+ l = l+1;
+ ld = ld+1;
+ ls = ls+1;
+ if (l < reglext) goto l81;
+ goto l81;
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ prolongate_4d_o1_rf2 (T const * restrict const src, \
+ ivect4 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect4 const & restrict dstext, \
+ ibbox4 const & restrict srcbbox, \
+ ibbox4 const & restrict dstbbox, \
+ ibbox4 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // CarpetLib
diff --git a/Carpet/CarpetLib/src/region.cc b/Carpet/CarpetLib/src/region.cc
index 0230d373d..860d24c6b 100644
--- a/Carpet/CarpetLib/src/region.cc
+++ b/Carpet/CarpetLib/src/region.cc
@@ -251,6 +251,67 @@ operator<< (ostream & os, region_t const & reg)
+// Create an MPI datatype for a pseudoretion
+MPI_Datatype
+mpi_datatype (pseudoregion_t const &)
+{
+ static bool initialised = false;
+ static MPI_Datatype newtype;
+ if (not initialised) {
+ static pseudoregion_t s;
+#define ENTRY(type, name) \
+ { \
+ sizeof s.name / sizeof(type), /* count elements */ \
+ (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \
+ dist::mpi_datatype<type>(), /* find MPI datatype */ \
+ STRINGIFY(name), /* field name */ \
+ STRINGIFY(type), /* type name */ \
+ }
+ dist::mpi_struct_descr_t const descr[] = {
+ ENTRY(int, extent),
+ ENTRY(int, component),
+ {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"}
+ };
+#undef ENTRY
+ newtype =
+ dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr,
+ "pseudoregion_t", sizeof s);
+ initialised = true;
+ }
+ return newtype;
+}
+
+MPI_Datatype
+mpi_datatype (sendrecv_pseudoregion_t const &)
+{
+ static bool initialised = false;
+ static MPI_Datatype newtype;
+ if (not initialised) {
+ static sendrecv_pseudoregion_t s;
+#define ENTRY(type, name) \
+ { \
+ sizeof s.name / sizeof(type), /* count elements */ \
+ (char*)&s.name - (char*)&s, /* offsetof doesn't work (why?) */ \
+ dist::mpi_datatype<type>(), /* find MPI datatype */ \
+ STRINGIFY(name), /* field name */ \
+ STRINGIFY(type), /* type name */ \
+ }
+ dist::mpi_struct_descr_t const descr[] = {
+ ENTRY(pseudoregion_t, send),
+ ENTRY(pseudoregion_t, recv),
+ {1, sizeof s, MPI_UB, "MPI_UB", "MPI_UB"}
+ };
+#undef ENTRY
+ newtype =
+ dist::create_mpi_datatype (sizeof descr / sizeof descr[0], descr,
+ "sendrecv_pseudoregion_t", sizeof s);
+ initialised = true;
+ }
+ return newtype;
+}
+
+
+
// Compare two pseudoregions for equality.
bool
operator== (pseudoregion_t const & a, pseudoregion_t const & b)
@@ -262,9 +323,45 @@ operator== (pseudoregion_t const & a, pseudoregion_t const & b)
+istream & operator>> (istream & is, pseudoregion_t & p)
+{
+ try {
+ skipws (is);
+ consume (is, "(ext:");
+ is >> p.extent;
+ skipws (is);
+ consume (is, ",c:");
+ is >> p.component;
+ skipws (is);
+ consume (is, ")");
+ } catch (input_error & err) {
+ cout << "Input error while reading a pseudoregion_t" << endl;
+ throw err;
+ }
+ return is;
+}
+
+istream & operator>> (istream & is, sendrecv_pseudoregion_t & srp)
+{
+ try {
+ skipws (is);
+ consume (is, "(send:");
+ is >> srp.send;
+ consume (is, ",recv:");
+ is >> srp.recv;
+ consume (is, ")");
+ } catch (input_error & err) {
+ cout << "Input error while reading a sendrecv_pseudoregion_t" << endl;
+ throw err;
+ }
+ return is;
+}
+
+
+
ostream & operator<< (ostream & os, pseudoregion_t const & p)
{
- return os << p.extent << "/c:" << p.component;
+ return os << "(ext:" << p.extent << ",c:" << p.component << ")";
}
ostream & operator<< (ostream & os, sendrecv_pseudoregion_t const & srp)
diff --git a/Carpet/CarpetLib/src/region.hh b/Carpet/CarpetLib/src/region.hh
index 66037bdc7..ed14b024d 100644
--- a/Carpet/CarpetLib/src/region.hh
+++ b/Carpet/CarpetLib/src/region.hh
@@ -5,6 +5,7 @@
#include <vector>
#include "defs.hh"
+#include "dist.hh"
#include "bbox.hh"
#include "fulltree.hh"
#include "vect.hh"
@@ -24,12 +25,16 @@ struct region_t {
region_t & operator= (region_t const & a);
~region_t ();
- bool invariant () const;
+ bool invariant () const CCTK_ATTRIBUTE_PURE;
};
-bool operator== (region_t const & a, region_t const & b);
+bool operator== (region_t const & a, region_t const & b)
+ CCTK_ATTRIBUTE_PURE;
+inline
+bool operator!= (region_t const & a, region_t const & b)
+ CCTK_ATTRIBUTE_PURE;
inline
bool operator!= (region_t const & a, region_t const & b)
{
@@ -44,7 +49,7 @@ combine_regions (vector<region_t> const & oldregs,
-size_t memoryof (region_t const & reg);
+size_t memoryof (region_t const & reg) CCTK_ATTRIBUTE_PURE;
istream & operator>> (istream & is, region_t & reg);
ostream & operator<< (ostream & os, region_t const & reg);
@@ -61,19 +66,37 @@ struct pseudoregion_t {
pseudoregion_t ()
{
}
+ pseudoregion_t (pseudoregion_t const & p)
+ : extent (p.extent), component (p.component)
+ {
+ }
pseudoregion_t (ibbox const & extent_, int const component_)
: extent (extent_), component (component_)
{
}
};
-bool operator== (pseudoregion_t const & a, pseudoregion_t const & b);
+MPI_Datatype mpi_datatype (pseudoregion_t const &)
+ CCTK_ATTRIBUTE_CONST;
+namespace dist {
+ template<> inline MPI_Datatype mpi_datatype<pseudoregion_t> ()
+ CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype<pseudoregion_t> ()
+ { pseudoregion_t dummy; return mpi_datatype(dummy); }
+}
+
+bool operator== (pseudoregion_t const & a, pseudoregion_t const & b)
+ CCTK_ATTRIBUTE_PURE;
+inline
+bool operator!= (pseudoregion_t const & a, pseudoregion_t const & b)
+ CCTK_ATTRIBUTE_PURE;
inline
bool operator!= (pseudoregion_t const & a, pseudoregion_t const & b)
{
return not (a == b);
}
+inline size_t memoryof (pseudoregion_t const & p) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (pseudoregion_t const & p)
{
return
@@ -81,6 +104,7 @@ inline size_t memoryof (pseudoregion_t const & p)
memoryof (p.component);
}
+istream & operator>> (istream & is, pseudoregion_t & p);
ostream & operator<< (ostream & os, pseudoregion_t const & p);
@@ -90,19 +114,35 @@ struct sendrecv_pseudoregion_t {
sendrecv_pseudoregion_t ()
{
}
+ sendrecv_pseudoregion_t (sendrecv_pseudoregion_t const & srp)
+ : send (srp.send), recv (srp.recv)
+ {
+ }
sendrecv_pseudoregion_t (ibbox const & send_extent, int const send_component,
- ibbox const & recv_extent, int const recv_component)
+ ibbox const & recv_extent, int const recv_component)
: send (pseudoregion_t (send_extent, send_component)),
recv (pseudoregion_t (recv_extent, recv_component))
{
}
};
+MPI_Datatype mpi_datatype (sendrecv_pseudoregion_t const &)
+ CCTK_ATTRIBUTE_CONST;
+namespace dist {
+ template<> inline MPI_Datatype mpi_datatype<sendrecv_pseudoregion_t> ()
+ CCTK_ATTRIBUTE_CONST;
+ template<> inline MPI_Datatype mpi_datatype<sendrecv_pseudoregion_t> ()
+ { sendrecv_pseudoregion_t dummy; return mpi_datatype(dummy); }
+}
+
+inline size_t memoryof (sendrecv_pseudoregion_t const & srp)
+ CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (sendrecv_pseudoregion_t const & srp)
{
return memoryof (srp.send) + memoryof (srp.recv);
}
+istream & operator>> (istream & os, sendrecv_pseudoregion_t & srp);
ostream & operator<< (ostream & os, sendrecv_pseudoregion_t const & srp);
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
index 7eebd36b0..c2f89ae4d 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
@@ -5,7 +5,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
index 26031f304..1e0cc6ec4 100644
--- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
@@ -7,7 +7,7 @@
#include <cctk.h>
#include <cctk_Parameters.h>
-#include "operator_prototypes.hh"
+#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
using namespace std;
diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
new file mode 100644
index 000000000..77bf2d28b
--- /dev/null
+++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
@@ -0,0 +1,141 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "operator_prototypes_4d.hh"
+#include "typeprops.hh"
+
+using namespace std;
+
+
+
+namespace CarpetLib {
+
+
+
+#define SRCIND4(i,j,k,l) \
+ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \
+ srciext, srcjext, srckext, srclext)
+#define DSTIND4(i,j,k,l) \
+ index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \
+ dstiext, dstjext, dstkext, dstlext)
+
+
+
+ template <typename T>
+ void
+ restrict_4d_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcext,
+ T * restrict const dst,
+ ivect4 const & restrict dstext,
+ ibbox4 const & restrict srcbbox,
+ ibbox4 const & restrict dstbbox,
+ ibbox4 const & restrict regbbox)
+ {
+ if (any (srcbbox.stride() >= regbbox.stride() or
+ dstbbox.stride() != regbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: strides disagree");
+ }
+
+ if (any (reffact2 * srcbbox.stride() != dstbbox.stride())) {
+ CCTK_WARN (0, "Internal error: destination strides are not twice the source strides");
+ }
+
+ // This could be handled, but is likely to point to an error
+ // elsewhere
+ if (regbbox.empty()) {
+ CCTK_WARN (0, "Internal error: region extent is empty");
+ }
+
+ if (not regbbox.is_contained_in(srcbbox) or
+ not regbbox.is_contained_in(dstbbox))
+ {
+ cerr << "srcbbox: " << srcbbox << endl
+ << "dstbbox: " << dstbbox << endl
+ << "regbbox: " << regbbox << endl;
+ CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
+ }
+
+ if (any (srcext != srcbbox.shape() / srcbbox.stride() or
+ dstext != dstbbox.shape() / dstbbox.stride()))
+ {
+ CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
+ }
+
+
+
+ ivect4 const regext = regbbox.shape() / regbbox.stride();
+ assert (all ((regbbox.lower() - srcbbox.lower()) % srcbbox.stride() == 0));
+ ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / srcbbox.stride();
+ assert (all ((regbbox.lower() - dstbbox.lower()) % dstbbox.stride() == 0));
+ ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / dstbbox.stride();
+
+
+
+ ptrdiff_t const srciext = srcext[0];
+ ptrdiff_t const srcjext = srcext[1];
+ ptrdiff_t const srckext = srcext[2];
+ ptrdiff_t const srclext = srcext[3];
+
+ ptrdiff_t const dstiext = dstext[0];
+ ptrdiff_t const dstjext = dstext[1];
+ ptrdiff_t const dstkext = dstext[2];
+ ptrdiff_t const dstlext = dstext[3];
+
+ ptrdiff_t const regiext = regext[0];
+ ptrdiff_t const regjext = regext[1];
+ ptrdiff_t const regkext = regext[2];
+ ptrdiff_t const reglext = regext[3];
+
+ ptrdiff_t const srcioff = srcoff[0];
+ ptrdiff_t const srcjoff = srcoff[1];
+ ptrdiff_t const srckoff = srcoff[2];
+ ptrdiff_t const srcloff = srcoff[3];
+
+ ptrdiff_t const dstioff = dstoff[0];
+ ptrdiff_t const dstjoff = dstoff[1];
+ ptrdiff_t const dstkoff = dstoff[2];
+ ptrdiff_t const dstloff = dstoff[3];
+
+
+
+ // Loop over coarse region
+#pragma omp parallel for
+ for (int l=0; l<reglext; ++l) {
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+ dst [DSTIND4(i, j, k, l)] = src [SRCIND4(2*i, 2*j, 2*k, 2*l)];
+
+ }
+ }
+ }
+ }
+
+ }
+
+
+
+#define INSTANTIATE(T) \
+ template \
+ void \
+ restrict_4d_rf2 (T const * restrict const src, \
+ ivect4 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect4 const & restrict dstext, \
+ ibbox4 const & restrict srcbbox, \
+ ibbox4 const & restrict dstbbox, \
+ ibbox4 const & restrict regbbox);
+#include "instantiate"
+#undef INSTANTIATE
+
+
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/startup_time.cc b/Carpet/CarpetLib/src/startup_time.cc
new file mode 100644
index 000000000..f361345fa
--- /dev/null
+++ b/Carpet/CarpetLib/src/startup_time.cc
@@ -0,0 +1,82 @@
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include <cctk.h>
+
+// IRIX wants this before <time.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# elif HAVE_TIME_H
+# include <time.h>
+# endif
+#endif
+
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#include "startup_time.hh"
+
+
+
+namespace CarpetLib {
+
+ using namespace std;
+
+
+
+ // Return the current wall time
+ static
+ double
+ get_walltime ()
+ {
+#ifdef HAVE_TIME_GETTIMEOFDAY
+ // get the current time
+ struct timeval tv;
+ gettimeofday (& tv, 0);
+ return tv.tv_sec + tv.tv_usec / 1.0e6;
+#else
+ return 0.0;
+#endif
+ }
+
+
+
+ void
+ output_startup_time ()
+ {
+ char * const cactus_starttime = getenv ("CACTUS_STARTTIME");
+ if (not cactus_starttime) {
+ CCTK_VWarn (CCTK_WARN_PICKY,
+ __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Could not determine Cactus startup time (environment variable CACTUS_STARTTIME is not set; it should be set to the output of \"date +%%s\")");
+ return;
+ }
+
+ double starttime;
+ int const iret = sscanf (cactus_starttime, "%lf", &starttime);
+ if (iret != 1) {
+ CCTK_VWarn (CCTK_WARN_COMPLAIN,
+ __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Could not determine Cactus startup time (environment variable CACTUS_STARTTIME has illegal value \"%s\"; it should instead be set to the output of \"date +%%s\", which is a single number)",
+ cactus_starttime);
+ return;
+ }
+
+ double const currenttime = get_walltime ();
+ double const startuptime = currenttime - starttime;
+
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "Process startup time was %.3g seconds", startuptime);
+ }
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/startup_time.hh b/Carpet/CarpetLib/src/startup_time.hh
new file mode 100644
index 000000000..f92b3df4b
--- /dev/null
+++ b/Carpet/CarpetLib/src/startup_time.hh
@@ -0,0 +1,6 @@
+namespace CarpetLib {
+
+ void
+ output_startup_time ();
+
+} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/th.cc b/Carpet/CarpetLib/src/th.cc
index a10dcb289..dd441e455 100644
--- a/Carpet/CarpetLib/src/th.cc
+++ b/Carpet/CarpetLib/src/th.cc
@@ -14,6 +14,10 @@ using namespace std;
+list<th*> th::allth;
+
+
+
// Constructors
th::th (gh& h_, const vector<int> & reffacts_, const CCTK_REAL basedelta)
: h(h_), reffacts(reffacts_), delta(basedelta)
@@ -24,13 +28,15 @@ th::th (gh& h_, const vector<int> & reffacts_, const CCTK_REAL basedelta)
assert (reffacts.AT(n) >= reffacts.AT(n-1));
assert (reffacts.AT(n) % reffacts.AT(n-1) == 0);
}
- h.add(this);
+ allthi = allth.insert(allth.end(), this);
+ gh_handle = h.add(this);
}
// Destructors
th::~th ()
{
- h.remove(this);
+ h.erase(gh_handle);
+ allth.erase(allthi);
}
// Modifiers
@@ -60,6 +66,10 @@ void th::regrid ()
}
}
+void th::regrid_free ()
+{
+}
+
// Memory usage
@@ -75,6 +85,19 @@ memory ()
memoryof (deltas);
}
+size_t
+th::
+allmemory ()
+{
+ size_t mem = memoryof(allth);
+ for (list<th*>::const_iterator
+ thi = allth.begin(); thi != allth.end(); ++ thi)
+ {
+ mem += memoryof(**thi);
+ }
+ return mem;
+}
+
// Output
diff --git a/Carpet/CarpetLib/src/th.hh b/Carpet/CarpetLib/src/th.hh
index 34e9af999..f1a7b72b7 100644
--- a/Carpet/CarpetLib/src/th.hh
+++ b/Carpet/CarpetLib/src/th.hh
@@ -25,10 +25,14 @@ ostream& operator<< (ostream& os, const th& t);
// The time hierarchy (information about the current time)
class th {
+ static list<th*> allth;
+ list<th*>::iterator allthi;
+
public: // should be readonly
// Fields
gh& h; // hierarchy
+ gh::th_handle gh_handle;
private:
@@ -48,9 +52,10 @@ public:
// Modifiers
void regrid ();
+ void regrid_free ();
// Time management
- CCTK_REAL get_time (const int rl, const int ml) const
+ CCTK_REAL get_time (const int rl, const int ml) const CCTK_ATTRIBUTE_PURE
{
assert (rl>=0 and rl<h.reflevels());
assert (ml>=0 and ml<h.mglevels());
@@ -69,7 +74,7 @@ public:
set_time(rl,ml, get_time(rl,ml) + get_delta(rl,ml));
}
- CCTK_REAL get_delta (const int rl, const int ml) const
+ CCTK_REAL get_delta (const int rl, const int ml) const CCTK_ATTRIBUTE_PURE
{
assert (rl>=0 and rl<h.reflevels());
assert (ml>=0 and ml<h.mglevels());
@@ -83,7 +88,7 @@ public:
deltas.AT(ml).AT(rl) = dt;
}
- CCTK_REAL time (const int tl, const int rl, const int ml) const
+ CCTK_REAL time (const int tl, const int rl, const int ml) const CCTK_ATTRIBUTE_PURE
{
assert (rl>=0 and rl<h.reflevels());
assert (ml>=0 and ml<h.mglevels());
@@ -91,12 +96,14 @@ public:
}
// Output
- size_t memory () const;
+ size_t memory () const CCTK_ATTRIBUTE_PURE;
+ static size_t allmemory () CCTK_ATTRIBUTE_PURE;
void output (ostream& os) const;
};
+inline size_t memoryof (th const & t) CCTK_ATTRIBUTE_PURE;
inline size_t memoryof (th const & t)
{
return t.memory ();
diff --git a/Carpet/CarpetLib/src/timestat.cc b/Carpet/CarpetLib/src/timestat.cc
index cbccfe51b..1115a6663 100644
--- a/Carpet/CarpetLib/src/timestat.cc
+++ b/Carpet/CarpetLib/src/timestat.cc
@@ -6,6 +6,7 @@
#include <fstream>
#include <iomanip>
#include <iostream>
+#include <limits>
#include <sstream>
#include <sys/time.h>
@@ -32,6 +33,7 @@ namespace CarpetLib {
static
bool have_cputick = false;
+ // CPU tick time in seconts
static
double cputick = 0.0;
@@ -266,11 +268,11 @@ namespace CarpetLib {
os << timername << ":"
<< " cnt: " << count
- << " time: sum: " << wtime
- << " avg: " << avg
- << " stddev: " << stddev
- << " min: " << wmin
- << " max: " << wmax
+ << " time: sum: " << cputick * wtime
+ << " avg: " << cputick * avg
+ << " stddev: " << cputick * stddev
+ << " min: " << cputick * wmin
+ << " max: " << cputick * wmax
<< " bytes: sum: " << bytes
<< " avg: " << bavg
<< " stddev: " << bstddev
@@ -324,11 +326,11 @@ namespace CarpetLib {
file << "Build ID: " << build_id << eol;
}
if (CCTK_IsFunctionAliased ("UniqueSimulationID")) {
- char const * const job_id =
+ char const * const sim_id =
static_cast <char const *> (UniqueSimulationID (cctkGH));
- file << "Simulation ID: " << job_id << eol;
+ file << "Simulation ID: " << sim_id << eol;
}
- file << "Running on " << dist::size() << " processors" << eol;
+ file << "Running with " << dist::size() << " processes and " << dist::total_num_threads() << " threads" << eol;
} // if do_print_info
file << "********************************************************************************" << eol
@@ -341,4 +343,161 @@ namespace CarpetLib {
}
+
+
+ struct t_cycleclock {
+ double total;
+ double total_squared;
+ double min_total;
+ double max_total;
+ double count;
+ ticks last;
+
+ t_cycleclock ()
+ {
+ reset();
+ }
+
+ ~t_cycleclock ()
+ {
+ }
+
+ void start ()
+ {
+ last = getticks();
+ }
+
+ void stop ()
+ {
+ ticks const current = getticks();
+ double const difference = elapsed (current, last);
+ total += difference;
+ total_squared += pow (difference, 2);
+ min_total = min_total == 0.0 ? difference : min (min_total, difference);
+ max_total = max (min_total, difference);
+ count += 1;
+ }
+
+ void reset ()
+ {
+ total = 0.0;
+ total_squared = 0.0;
+ min_total = 0.0; // numeric_limits<double>::max();
+ max_total = 0.0;
+ count = 0.0;
+ // last = 0.0;
+ }
+
+ };
+
+
+
+ void * cycleclock_create (int const timernum)
+ {
+ return new t_cycleclock;
+ }
+
+ void cycleclock_destroy (int const timernum, void * const data)
+ {
+ if (data) {
+ delete static_cast<t_cycleclock*> (data);
+ }
+ }
+
+ void cycleclock_start (int const timernum, void * const data)
+ {
+ static_cast<t_cycleclock*> (data) -> start();
+ }
+
+ void cycleclock_stop (int const timernum, void * const data)
+ {
+ static_cast<t_cycleclock*> (data) -> stop();
+ }
+
+ void cycleclock_reset (int const timernum, void * const data)
+ {
+ static_cast<t_cycleclock*> (data) -> reset();
+ }
+
+ void cycleclock_get (int const timernum, void * const data_,
+ cTimerVal * const vals)
+ {
+ t_cycleclock const & data = * static_cast<t_cycleclock const *> (data_);
+
+ // Total time
+ vals[0].type = val_double;
+ vals[0].heading = "cycle";
+ vals[0].units = "secs";
+ vals[0].val.d = data.total;
+ vals[0].seconds = cputick * vals[0].val.d;
+ vals[0].resolution = cputick;
+
+ // Average
+ vals[1].type = val_double;
+ vals[1].heading = "cycle[avg]";
+ vals[1].units = "secs";
+ vals[1].val.d = data.count == 0.0 ? 0.0 : data.total / data.count;
+ vals[1].seconds = cputick * vals[1].val.d;
+ vals[1].resolution = cputick;
+
+ // Standard deviation
+ vals[2].type = val_double;
+ vals[2].heading = "cycle[stddev]";
+ vals[2].units = "secs";
+ vals[2].val.d = (data.count == 0.0
+ ? 0.0
+ : sqrt (abs (data.total_squared * data.count -
+ pow (data.total, 2)) / data.count));
+ vals[2].seconds = cputick * vals[2].val.d;
+ vals[2].resolution = cputick;
+
+ // Minimum
+ vals[3].type = val_double;
+ vals[3].heading = "cycle[min]";
+ vals[3].units = "secs";
+ vals[3].val.d = data.min_total;
+ vals[3].seconds = cputick * vals[3].val.d;
+ vals[3].resolution = cputick;
+
+ // Maximum
+ vals[4].type = val_double;
+ vals[4].heading = "cycle[max]";
+ vals[4].units = "secs";
+ vals[4].val.d = data.max_total;
+ vals[4].seconds = cputick * vals[4].val.d;
+ vals[4].resolution = cputick;
+ }
+
+ void cycleclock_set (int const timernum, void * const data_,
+ cTimerVal * const vals)
+ {
+ t_cycleclock & data = * static_cast<t_cycleclock * restrict> (data_);
+
+ data.reset(); // punt
+ data.total = vals[0].val.d;
+ }
+
+ extern "C" {
+ int CarpetLib_registercycleclock (void);
+ }
+
+ int CarpetLib_registercycleclock (void)
+ {
+ if (not have_cputick) calculate_cputick ();
+
+ cClockFuncs functions;
+ functions.n_vals = 5;
+ functions.create = cycleclock_create;
+ functions.destroy = cycleclock_destroy;
+ functions.start = cycleclock_start;
+ functions.stop = cycleclock_stop;
+ functions.reset = cycleclock_reset;
+ functions.get = cycleclock_get;
+ functions.set = cycleclock_set;
+
+ CCTK_ClockRegister("cycle", &functions);
+
+ return 0;
+ }
+
} // namespace CarpetLib
diff --git a/Carpet/CarpetLib/src/vect.cc b/Carpet/CarpetLib/src/vect.cc
index 28eff14dc..e38ed745a 100644
--- a/Carpet/CarpetLib/src/vect.cc
+++ b/Carpet/CarpetLib/src/vect.cc
@@ -43,21 +43,42 @@ void vect<T,D>::output (ostream& os) const {
+// Specialise some constructors for lower dimensions
+// These functions are declared, but must not be used.
+
+template<> vect<int,0>::vect (const int& x, const int& y) { assert(0); }
+template<> vect<int,1>::vect (const int& x, const int& y) { assert(0); }
+template<> vect<int,3>::vect (const int& x, const int& y) { assert(0); }
+template<> vect<int,4>::vect (const int& x, const int& y) { assert(0); }
+
+template<> vect<int,0>::vect (const int& x, const int& y, const int& z) { assert(0); }
+template<> vect<int,1>::vect (const int& x, const int& y, const int& z) { assert(0); }
+template<> vect<int,2>::vect (const int& x, const int& y, const int& z) { assert(0); }
+template<> vect<int,4>::vect (const int& x, const int& y, const int& z) { assert(0); }
+
+template<> vect<int,0>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
+template<> vect<int,1>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
+template<> vect<int,2>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
+template<> vect<int,3>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
+
+
+
// Note: We need all dimensions all the time.
template class vect<int,0>;
template class vect<int,1>;
template class vect<int,2>;
template class vect<int,3>;
+template class vect<int,4>;
-template void vect<CCTK_REAL,3>::input (istream& is);
-template void vect<vect<bool,2>,3>::input (istream& is);
-template void vect<vect<bool,3>,2>::input (istream& is);
+template void vect<CCTK_REAL,dim>::input (istream& is);
+template void vect<vect<bool,2>,dim>::input (istream& is);
+template void vect<vect<bool,dim>,2>::input (istream& is);
template void vect<bool,2>::output (ostream& os) const;
-template void vect<bool,3>::output (ostream& os) const;
+template void vect<bool,dim>::output (ostream& os) const;
template void vect<CCTK_REAL,2>::output (ostream& os) const;
-template void vect<CCTK_REAL,3>::output (ostream& os) const;
-template void vect<vect<bool,2>,3>::output (ostream& os) const;
-template void vect<vect<int,2>,3>::output (ostream& os) const;
-template void vect<vect<bool,3>,2>::output (ostream& os) const;
-template void vect<vect<int,3>,2>::output (ostream& os) const;
+template void vect<CCTK_REAL,dim>::output (ostream& os) const;
+template void vect<vect<bool,2>,dim>::output (ostream& os) const;
+template void vect<vect<int,2>,dim>::output (ostream& os) const;
+template void vect<vect<bool,dim>,2>::output (ostream& os) const;
+template void vect<vect<int,dim>,2>::output (ostream& os) const;
diff --git a/Carpet/CarpetLib/src/vect.hh b/Carpet/CarpetLib/src/vect.hh
index 56898d970..03e62aba6 100644
--- a/Carpet/CarpetLib/src/vect.hh
+++ b/Carpet/CarpetLib/src/vect.hh
@@ -8,39 +8,17 @@
#include "cctk.h"
+#include "defs.hh"
#include "vect_helpers.hh"
using namespace std;
-#if 0
-
-// A pure function returns a value that depends only on the function
-// arguments and on global variables, and the function has no side
-// effects.
-#ifdef HAVE_CCTK_CXX_ATTRIBUTE_PURE
-# define PURE __attribute__((pure))
-#else
-# define PURE
-#endif
-
-// A const function returns a value that depends only on the function
-// arguments, and the function has no side effects. This is even more
-// strict than pure functions. Const functions cannot dereference
-// pointers or references (or this).
-#ifdef HAVE_CCTK_CXX_ATTRIBUTE_CONST
-# define CONST __attribute__((const))
+#ifdef CARPET_DEBUG
+# define ASSERT_VECT(x) assert(x)
#else
-# define CONST
-#endif
-
-#else
-
-// Don't take any risks
-# define PURE
-# define CONST
-
+# define ASSERT_VECT(x)
#endif
@@ -72,38 +50,44 @@ public:
// Constructors
/** Explicit empty constructor. */
- explicit vect () CONST { }
+ explicit vect () CCTK_MEMBER_ATTRIBUTE_CONST { }
/** Copy constructor. */
- vect (const vect& a) PURE {
+ vect (const vect& a) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
for (int d=0; d<D; ++d) elt[d]=a.elt[d];
}
/** Constructor from a single element. This constructor might be
confusing, but it is very convenient. */
- vect (const T& x) PURE {
+ vect (const T& x) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
for (int d=0; d<D; ++d) elt[d]=x;
}
/** Constructor for 2-element vectors from 2 elements. */
- vect (const T& x, const T& y) PURE {
- assert (D==2);
+ vect (const T& x, const T& y) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==2);
// Note: this statement may give "index out of range" warnings.
// You can safely ignore these.
elt[0]=x; elt[1]=y;
}
/** Constructor for 3-element vectors from 3 elements. */
- vect (const T& x, const T& y, const T& z) PURE {
- assert (D==3);
+ vect (const T& x, const T& y, const T& z) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==3);
// Note: this statement may give "index out of range" warnings.
// You can safely ignore these.
elt[0]=x; elt[1]=y; elt[2]=z;
}
/** Constructor for 4-element vectors from 4 elements. */
- vect (const T& x, const T& y, const T& z, const T& t) PURE {
- assert (D==4);
+ vect (const T& x, const T& y, const T& z, const T& t)
+ CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==4);
// Note: this statement may give "index out of range" warnings.
// You can safely ignore these.
elt[0]=x; elt[1]=y; elt[2]=z; elt[3]=t;
@@ -112,7 +96,8 @@ public:
#if 0
// This creates confusion
/** Constructor from a pointer, i.e., a C array. */
- explicit vect (const T* const x) PURE {
+ explicit vect (const T* const x) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
for (int d=0; d<D; ++d) elt[d]=x[d];
}
#endif
@@ -120,7 +105,8 @@ public:
#if 0
// This leads to an ICE on AIX
template<int E>
- operator vect<vect<T,D>,E> () CONST {
+ operator vect<vect<T,D>,E> () CCTK_MEMBER_ATTRIBUTE_CONST
+ {
vect<vect<T,D>,E> r;
for (int e=0; e<E; ++e) r[e]=*this;
return r;
@@ -129,74 +115,88 @@ public:
/** Constructor from a vector with a different type. */
template<typename S>
- /*explicit*/ vect (const vect<S,D>& a) /*PURE*/ {
+ /*explicit*/ vect (const vect<S,D>& a) /*CCTK_MEMBER_ATTRIBUTE_PURE*/
+ {
for (int d=0; d<D; ++d) elt[d]=(T)a[d];
}
/** Create a new 0-element vector with a specific type. */
- static vect make () CONST {
- assert (D==0);
+ static vect make () CCTK_MEMBER_ATTRIBUTE_CONST
+ {
+ ASSERT_VECT (D==0);
return vect();
}
/** Create a new 1-element vector with a specific type. */
- static vect make (const T& x) PURE {
- assert (D==1);
+ static vect make (const T& x) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==1);
return vect(x);
}
/** Create a new 2-element vector with a specific type. */
- static vect make (const T& x, const T& y) PURE {
- assert (D==2);
+ static vect make (const T& x, const T& y) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==2);
return vect(x, y);
}
/** Create a new 3-element vector with a specific type. */
- static vect make (const T& x, const T& y, const T& z) PURE {
- assert (D==3);
+ static vect make (const T& x, const T& y, const T& z)
+ CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==3);
return vect(x, y, z);
}
/** Create a new 4-element vector with a specific type. */
- static vect make (const T& x, const T& y, const T& z, const T& t) PURE {
- assert (D==4);
+ static vect make (const T& x, const T& y, const T& z, const T& t)
+ CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (D==4);
return vect(x, y, z, t);
}
/** Treat a constant pointer as a reference to a constant vector. */
- static const vect& ref (const T* const x) PURE {
+ static const vect& ref (const T* const x) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
return *(const vect*)x;
}
/** Treat a pointer as a reference to a vector. */
- static vect& ref (T* const x) PURE {
+ static vect& ref (T* const x) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
return *(vect*)x;
}
/** Create a vector with one element set to 1 and all other elements
set to zero. */
- static vect dir (const int d) CONST {
+ static vect dir (const int d) CCTK_MEMBER_ATTRIBUTE_CONST
+ {
vect r=(T)0;
r[d]=1;
return r;
}
/** Create a vector with e[i] = i. */
- static vect seq () CONST {
+ static vect seq () CCTK_MEMBER_ATTRIBUTE_CONST
+ {
vect r;
for (int d=0; d<D; ++d) r[d]=d;
return r;
}
/** Create a vector with e[i] = n + i. */
- static vect seq (const int n) CONST {
+ static vect seq (const int n) CCTK_MEMBER_ATTRIBUTE_CONST
+ {
vect r;
for (int d=0; d<D; ++d) r[d]=n+d;
return r;
}
/** Create a vector with e[i] = n + s * i. */
- static vect seq (const int n, const int s) CONST {
+ static vect seq (const int n, const int s) CCTK_MEMBER_ATTRIBUTE_CONST
+ {
vect r;
for (int d=0; d<D; ++d) r[d]=n+s*d;
return r;
@@ -207,21 +207,24 @@ public:
/** Return a non-writable element of a vector. */
// (Don't return a reference; *this might be a temporary)
// Do return a reference, so that a vector can be accessed as array
- const T& operator[] (const int d) const PURE {
- assert(d>=0 && d<D);
+ const T& operator[] (const int d) const CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT(d>=0 && d<D);
return elt[d];
}
/** Return a writable element of a vector as reference. */
- T& operator[] (const int d) PURE {
- assert(d>=0 && d<D);
+ T& operator[] (const int d) CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT(d>=0 && d<D);
return elt[d];
}
#if 0
// This creates confusion
/** Return a pointer to a vector. */
- operator const T* () const PURE {
+ operator const T* () const CCTK_MEMBER_ATTRIBUTE_PURE
+ {
return this;
}
#endif
@@ -229,7 +232,9 @@ public:
/** Return a combination of the vector elements e[a[i]]. The
element combination is selected by another vector. */
template<typename TT, int DD>
- vect<T,DD> operator[] (const vect<TT,DD>& a) const /*PURE*/ {
+ vect<T,DD> operator[] (const vect<TT,DD>& a)
+ const /*CCTK_MEMBER_ATTRIBUTE_PURE*/
+ {
vect<T,DD> r;
// (*this)[] performs index checking
for (int d=0; d<DD; ++d) r[d] = (*this)[a[d]];
@@ -249,14 +254,16 @@ public:
// Non-modifying operators
/** Return a new vector where one element has been replaced. */
- vect replace (const int d, const T& x) const PURE {
- assert (d>=0 && d<D);
+ vect replace (const int d, const T& x) const CCTK_MEMBER_ATTRIBUTE_PURE
+ {
+ ASSERT_VECT (d>=0 && d<D);
vect r;
for (int dd=0; dd<D; ++dd) r[dd]=dd==d?x:elt[dd];
return r;
}
- vect reverse () const PURE {
+ vect reverse () const CCTK_MEMBER_ATTRIBUTE_PURE
+ {
vect r;
for (int d=0; d<D; ++d) r[d]=elt[D-1-d];
return r;
@@ -272,7 +279,9 @@ public:
elements set to either a[i] or b[i], depending on whether
(*this)[i] is true or not. */
template<typename TT>
- vect<TT,D> ifthen (const vect<TT,D>& a, const vect<TT,D>& b) const /*PURE*/ {
+ vect<TT,D> ifthen (const vect<TT,D>& a, const vect<TT,D>& b)
+ const /*CCTK_MEMBER_ATTRIBUTE_PURE*/
+ {
vect<TT,D> r;
for (int d=0; d<D; ++d) r[d]=elt[d]?a[d]:b[d];
return r;
@@ -286,15 +295,16 @@ public:
vect &vec;
int d;
public:
- iter (vect &a) PURE: vec(a), d(0) { }
- iter& operator++ () { assert(d<D); ++d; return *this; }
- bool operator bool () const PURE { return d==D; }
- T& operator* () PURE { return vec[d]; }
+ iter (vect &a) CCTK_MEMBER_ATTRIBUTE_PURE: vec(a), d(0) { }
+ iter& operator++ () { ASSERT_VECT(d<D); ++d; return *this; }
+ bool operator bool () const CCTK_MEMBER_ATTRIBUTE_PURE { return d==D; }
+ T& operator* () CCTK_MEMBER_ATTRIBUTE_PURE { return vec[d]; }
};
#endif
// Memory usage
- size_t memory () const { return D * memoryof (*elt); }
+ size_t memory () const CCTK_MEMBER_ATTRIBUTE_CONST
+ { return D * memoryof (*elt); }
// Input/Output helpers
void input (istream& is);
@@ -310,7 +320,8 @@ public:
true or not. */
template<typename S,typename T,int D>
inline vect<T,D> either (const vect<S,D>& a,
- const vect<T,D>& b, const vect<T,D>& c) PURE;
+ const vect<T,D>& b, const vect<T,D>& c)
+ CCTK_ATTRIBUTE_PURE;
template<typename S,typename T,int D>
inline vect<T,D> either (const vect<S,D>& a,
const vect<T,D>& b, const vect<T,D>& c)
@@ -322,7 +333,8 @@ inline vect<T,D> either (const vect<S,D>& a,
template<typename S,typename T,int D>
inline vect<T,D> either (const vect<S,D>& a,
- const T& b, const T& c) PURE;
+ const T& b, const T& c)
+ CCTK_ATTRIBUTE_PURE;
template<typename S,typename T,int D>
inline vect<T,D> either (const vect<S,D>& a,
const T& b, const T& c)
@@ -332,9 +344,11 @@ inline vect<T,D> either (const vect<S,D>& a,
/** Transpose a vector of a vector */
template<typename T, int D, int DD>
-inline vect<vect<T,D>,DD> xpose (vect<vect<T,DD>,D> const & a) PURE;
+inline vect<vect<T,D>,DD> xpose (vect<vect<T,DD>,D> const & a)
+ CCTK_ATTRIBUTE_PURE;
template<typename T, int D, int DD>
-inline vect<vect<T,D>,DD> xpose (vect<vect<T,DD>,D> const & a) {
+inline vect<vect<T,D>,DD> xpose (vect<vect<T,DD>,D> const & a)
+{
vect<vect<T,D>,DD> r;
for (int dd=0; dd<DD; ++dd) for (int d=0; d<D; ++d) r[dd][d] = a[d][dd];
return r;
@@ -342,9 +356,11 @@ inline vect<vect<T,D>,DD> xpose (vect<vect<T,DD>,D> const & a) {
/** Return the element-wise integer power of two vectors. */
template<typename T,int D>
-inline vect<T,D> ipow (const vect<T,D>& a, const vect<int,D>& b) PURE;
+inline vect<T,D> ipow (const vect<T,D>& a, const vect<int,D>& b)
+ CCTK_ATTRIBUTE_PURE;
template<typename T,int D>
-inline vect<T,D> ipow (const vect<T,D>& a, const vect<int,D>& b) {
+inline vect<T,D> ipow (const vect<T,D>& a, const vect<int,D>& b)
+{
vect<T,D> r;
for (int d=0; d<D; ++d) r[d]=ipow(a[d],b[d]);
return r;
@@ -398,26 +414,29 @@ DECLARE_REDUCTION_OPERATOR_2 (hypot,0,+=,*,sqrt)
/** Count the number of elements in the vector. */
template<typename T,int D>
-inline int count (const vect<T,D>& a) PURE;
+inline int count (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE;
template<typename T,int D>
-inline int count (const vect<T,D>& a) {
+inline int count (const vect<T,D>& a)
+{
return D;
}
/** Return the size (number of elements) of the vector. */
template<typename T,int D>
-inline int size (const vect<T,D>& a) CONST;
+inline int size (const vect<T,D>& a) CCTK_ATTRIBUTE_CONST;
template<typename T,int D>
-inline int size (const vect<T,D>& a) {
+inline int size (const vect<T,D>& a)
+{
return D;
}
/** Return the index of the first maximum element. */
template<typename T,int D>
-inline int maxloc (const vect<T,D>& a) PURE;
+inline int maxloc (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE;
template<typename T,int D>
-inline int maxloc (const vect<T,D>& a) {
- assert (D>0);
+inline int maxloc (const vect<T,D>& a)
+{
+ ASSERT_VECT (D>0);
int r(0);
for (int d=1; d<D; ++d) if (a[d]>a[r]) r=d;
return r;
@@ -425,10 +444,11 @@ inline int maxloc (const vect<T,D>& a) {
/** Return the index of the first minimum element. */
template<typename T,int D>
-inline int minloc (const vect<T,D>& a) PURE;
+inline int minloc (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE;
template<typename T,int D>
-inline int minloc (const vect<T,D>& a) {
- assert (D>0);
+inline int minloc (const vect<T,D>& a)
+{
+ ASSERT_VECT (D>0);
int r(0);
for (int d=1; d<D; ++d) if (a[d]<a[r]) r=d;
return r;
@@ -436,15 +456,16 @@ inline int minloc (const vect<T,D>& a) {
/** Return the n-dimensional linear array index. */
template<typename T,int D>
-inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) PURE;
+inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) CCTK_ATTRIBUTE_PURE;
template<typename T,int D>
-inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) {
+inline T index (const vect<T,D>& lsh, const vect<T,D>& ind)
+{
T r(0);
for (int d=D-1; d>=0; --d) {
- assert (lsh[d]>=0);
+ ASSERT_VECT (lsh[d]>=0);
// Be generous, and allow relative indices which may be negtive
- // assert (ind[d]>=0 and ind[d]<lsh[d]);
- assert (abs(ind[d])<=lsh[d]);
+ // ASSERT_VECT (ind[d]>=0 and ind[d]<lsh[d]);
+ ASSERT_VECT (abs(ind[d])<=lsh[d]);
r = r * lsh[d] + ind[d];
}
return r;
@@ -460,7 +481,11 @@ inline T index (const vect<T,D>& lsh, const vect<T,D>& ind) {
/** Return a new vector where the function func() has been applied to
all elements. */
template<typename T, typename U, int D>
-inline vect<U,D> map (U (* const func)(T x), const vect<T,D>& a) {
+inline vect<U,D> map (U (* const func)(T x), const vect<T,D>& a)
+ CCTK_ATTRIBUTE_PURE;
+template<typename T, typename U, int D>
+inline vect<U,D> map (U (* const func)(T x), const vect<T,D>& a)
+{
vect<U,D> r;
for (int d=0; d<D; ++d) r[d] = func(a[d]);
return r;
@@ -471,6 +496,10 @@ inline vect<U,D> map (U (* const func)(T x), const vect<T,D>& a) {
template<typename S, typename T, typename U, int D>
inline vect<U,D> zip (U (* const func)(S x, T y),
const vect<S,D>& a, const vect<T,D>& b)
+ CCTK_ATTRIBUTE_PURE;
+template<typename S, typename T, typename U, int D>
+inline vect<U,D> zip (U (* const func)(S x, T y),
+ const vect<S,D>& a, const vect<T,D>& b)
{
vect<U,D> r;
for (int d=0; d<D; ++d) r[d] = func(a[d], b[d]);
@@ -481,6 +510,9 @@ inline vect<U,D> zip (U (* const func)(S x, T y),
the vector a, starting with the scalar value val. */
template<typename T, typename U, int D>
inline U fold (U (* const func)(U val, T x), U val, const vect<T,D>& a)
+ CCTK_ATTRIBUTE_PURE;
+template<typename T, typename U, int D>
+inline U fold (U (* const func)(U val, T x), U val, const vect<T,D>& a)
{
for (int d=0; d<D; ++d) val = func(val, a[d]);
return val;
@@ -490,8 +522,11 @@ inline U fold (U (* const func)(U val, T x), U val, const vect<T,D>& a)
the vector a, starting with element 0. */
template<typename T, typename U, int D>
inline U fold1 (U (* const func)(U val, T x), const vect<T,D>& a)
+ CCTK_ATTRIBUTE_PURE;
+template<typename T, typename U, int D>
+inline U fold1 (U (* const func)(U val, T x), const vect<T,D>& a)
{
- assert (D>=1);
+ ASSERT_VECT (D>=1);
U val = a[0];
for (int d=1; d<D; ++d) val = func(val, a[d]);
return val;
@@ -502,6 +537,10 @@ inline U fold1 (U (* const func)(U val, T x), const vect<T,D>& a)
template<typename T, typename U, int D>
inline vect<U,D> scan0 (U (* const func)(U val, T x), U val,
const vect<T,D>& a)
+ CCTK_ATTRIBUTE_PURE;
+template<typename T, typename U, int D>
+inline vect<U,D> scan0 (U (* const func)(U val, T x), U val,
+ const vect<T,D>& a)
{
vect<U,D> r;
for (int d=0; d<D; ++d) {
@@ -516,6 +555,10 @@ inline vect<U,D> scan0 (U (* const func)(U val, T x), U val,
template<typename T, typename U, int D>
inline vect<U,D> scan1 (U (* const func)(U val, T x), U val,
const vect<T,D>& a)
+ CCTK_ATTRIBUTE_PURE;
+template<typename T, typename U, int D>
+inline vect<U,D> scan1 (U (* const func)(U val, T x), U val,
+ const vect<T,D>& a)
{
vect<U,D> r;
for (int d=0; d<D; ++d) {
@@ -531,7 +574,10 @@ inline vect<U,D> scan1 (U (* const func)(U val, T x), U val,
// Memory usage
template<typename T,int D>
-inline size_t memoryof (vect<T,D> const & a) { return a.memory(); }
+inline size_t memoryof (vect<T,D> const & a) CCTK_ATTRIBUTE_CONST;
+template<typename T,int D>
+inline size_t memoryof (vect<T,D> const & a)
+{ return a.memory(); }
@@ -562,23 +608,26 @@ inline ostream& operator<< (ostream& os, const vect<T,D>& a) {
/** Constructor for 2-element vectors from 2 elements. */
template<typename T>
-inline vect<T,2>::vect<T,2> (const T& x, const T& y) PURE;
+inline vect<T,2>::vect<T,2> (const T& x, const T& y) CCTK_ATTRIBUTE_PURE;
template<typename T>
-inline vect<T,2>::vect<T,2> (const T& x, const T& y) {
+inline vect<T,2>::vect<T,2> (const T& x, const T& y)
+{
elt[0]=x; elt[1]=y;
}
/** Constructor for 3-element vectors from 3 elements. */
-vect (const T& x, const T& y, const T& z) PURE;
-vect (const T& x, const T& y, const T& z) {
- assert (D==3);
+vect (const T& x, const T& y, const T& z) CCTK_ATTRIBUTE_PURE;
+vect (const T& x, const T& y, const T& z)
+{
+ ASSERT_VECT (D==3);
elt[0]=x; elt[1]=y; elt[2]=z;
}
/** Constructor for 4-element vectors from 4 elements. */
-vect (const T& x, const T& y, const T& z, const T& t) PURE;
-vect (const T& x, const T& y, const T& z, const T& t) {
- assert (D==4);
+vect (const T& x, const T& y, const T& z, const T& t) CCTK_ATTRIBUTE_PURE;
+vect (const T& x, const T& y, const T& z, const T& t)
+{
+ ASSERT_VECT (D==4);
elt[0]=x; elt[1]=y; elt[2]=z; elt[3]=t;
}
#endif
@@ -590,29 +639,30 @@ vect (const T& x, const T& y, const T& z, const T& t) {
// Specialise some constructors for lower dimensions
-// These functions are declared, but never defined, so that using them
-// will result in a linker error
-
-template<> inline vect<int,0>::vect (const int& x, const int& y) { assert(0); }
-template<> inline vect<int,1>::vect (const int& x, const int& y) { assert(0); }
+// These functions are declared, but must not be used.
-template<> inline vect<int,0>::vect (const int& x, const int& y, const int& z) { assert(0); }
-template<> inline vect<int,1>::vect (const int& x, const int& y, const int& z) { assert(0); }
-template<> inline vect<int,2>::vect (const int& x, const int& y, const int& z) { assert(0); }
+template<> vect<int,0>::vect (const int& x, const int& y);
+template<> vect<int,1>::vect (const int& x, const int& y);
+template<> vect<int,3>::vect (const int& x, const int& y);
+template<> vect<int,4>::vect (const int& x, const int& y);
-template<> inline vect<int,0>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
-template<> inline vect<int,1>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
-template<> inline vect<int,2>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
-template<> inline vect<int,3>::vect (const int& x, const int& y, const int& z, const int& t) { assert(0); }
+template<> vect<int,0>::vect (const int& x, const int& y, const int& z);
+template<> vect<int,1>::vect (const int& x, const int& y, const int& z);
+template<> vect<int,2>::vect (const int& x, const int& y, const int& z);
+template<> vect<int,4>::vect (const int& x, const int& y, const int& z);
+template<> vect<int,0>::vect (const int& x, const int& y, const int& z, const int& t);
+template<> vect<int,1>::vect (const int& x, const int& y, const int& z, const int& t);
+template<> vect<int,2>::vect (const int& x, const int& y, const int& z, const int& t);
+template<> vect<int,3>::vect (const int& x, const int& y, const int& z, const int& t);
// Specialise for CCTK_REAL
template<>
-inline vect<CCTK_REAL,3>& vect<CCTK_REAL,3>::operator%=(const vect<CCTK_REAL,3>& a) {
- for (int d=0; d<3; ++d) {
+inline vect<CCTK_REAL,dim>& vect<CCTK_REAL,dim>::operator%=(const vect<CCTK_REAL,dim>& a) {
+ for (int d=0; d<dim; ++d) {
elt[d]=fmod(elt[d],a[d]);
if (elt[d]>a[d]*(CCTK_REAL)(1.0-1.0e-10)) elt[d]=(CCTK_REAL)0;
if (elt[d]<a[d]*(CCTK_REAL)( 1.0e-10)) elt[d]=(CCTK_REAL)0;
@@ -621,9 +671,9 @@ inline vect<CCTK_REAL,3>& vect<CCTK_REAL,3>::operator%=(const vect<CCTK_REAL,3>&
}
template<>
-inline vect<CCTK_REAL,3> operator%(const vect<CCTK_REAL,3>& a, const vect<CCTK_REAL,3>& b) {
- vect<CCTK_REAL,3> r;
- for (int d=0; d<3; ++d) {
+inline vect<CCTK_REAL,dim> operator%(const vect<CCTK_REAL,dim>& a, const vect<CCTK_REAL,dim>& b) {
+ vect<CCTK_REAL,dim> r;
+ for (int d=0; d<dim; ++d) {
r[d]=fmod(a[d],b[d]);
if (r[d]>b[d]*(CCTK_REAL)(1.0-1.0e-10)) r[d]=(CCTK_REAL)0;
if (r[d]<b[d]*(CCTK_REAL)( 1.0e-10)) r[d]=(CCTK_REAL)0;
diff --git a/Carpet/CarpetLib/src/vect_helpers.hh b/Carpet/CarpetLib/src/vect_helpers.hh
index afcce6e7e..471ae02ba 100644
--- a/Carpet/CarpetLib/src/vect_helpers.hh
+++ b/Carpet/CarpetLib/src/vect_helpers.hh
@@ -7,7 +7,8 @@
#define DECLARE_MEMBER_OPERATOR_0(fn,op) \
\
- vect fn () const PURE { \
+ vect fn () const CCTK_ATTRIBUTE_PURE \
+ { \
vect r; \
for (int d=0; d<D; ++d) r[d]=op elt[d]; \
return r; \
@@ -20,7 +21,8 @@
#define DECLARE_MEMBER_OPERATOR_0_RET(fn,op,R) \
\
- vect<R,D> fn () const PURE { \
+ vect<R,D> fn () const CCTK_ATTRIBUTE_PURE \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=op elt[d]; \
return r; \
@@ -33,12 +35,14 @@
#define DECLARE_MEMBER_OPERATOR_1_REF(fn,op) \
\
- vect& fn (const T& x) { \
+ vect& fn (const T& x) \
+ { \
for (int d=0; d<D; ++d) elt[d] op x; \
return *this; \
} \
\
- vect& fn (const vect& a) { \
+ vect& fn (const vect& a) \
+ { \
for (int d=0; d<D; ++d) elt[d] op a[d]; \
return *this; \
}
@@ -47,25 +51,26 @@
// Declare a function which takes 1 argument and returns type R
-#define DECLARE_FUNCTION_1_RET(fn,R) \
- \
- template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a) PURE; \
- template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a) { \
- vect<R,D> r; \
- for (int d=0; d<D; ++d) r[d]=fn(a[d]); \
- return r; \
- } \
- \
- template<typename T,int D,int E> \
- inline vect<R,D> fn (const vect<vect<T,D>,E>& a) PURE; \
- template<typename T,int D,int E> \
- inline vect<R,D> fn (const vect<vect<T,D>,E>& a) \
- { \
- vect<R,D> r; \
- for (int e=0; e<E; ++e) r[e]=fn(a[e]); \
- return r; \
+#define DECLARE_FUNCTION_1_RET(fn,R) \
+ \
+ template<typename T,int D> \
+ inline vect<R,D> fn (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE; \
+ template<typename T,int D> \
+ inline vect<R,D> fn (const vect<T,D>& a) \
+ { \
+ vect<R,D> r; \
+ for (int d=0; d<D; ++d) r[d]=fn(a[d]); \
+ return r; \
+ } \
+ \
+ template<typename T,int D,int E> \
+ inline vect<R,D> fn (const vect<vect<T,D>,E>& a) CCTK_ATTRIBUTE_PURE; \
+ template<typename T,int D,int E> \
+ inline vect<R,D> fn (const vect<vect<T,D>,E>& a) \
+ { \
+ vect<R,D> r; \
+ for (int e=0; e<E; ++e) r[e]=fn(a[e]); \
+ return r; \
}
@@ -81,36 +86,45 @@
#define DECLARE_FUNCTION_2_RET(fn,R) \
\
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) PURE; \
+ inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) { \
+ inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=fn(a[d],b[d]); \
return r; \
} \
\
template<typename T,int D> \
- inline vect<R,D> fn (const T& a, const vect<T,D>& b) PURE; \
+ inline vect<R,D> fn (const T& a, const vect<T,D>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const T& a, const vect<T,D>& b) { \
+ inline vect<R,D> fn (const T& a, const vect<T,D>& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=fn(a,b[d]); \
return r; \
} \
\
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const T& b) PURE; \
+ inline vect<R,D> fn (const vect<T,D>& a, const T& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const T& b) { \
+ inline vect<R,D> fn (const vect<T,D>& a, const T& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=fn(a[d],b); \
return r; \
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const vect<vect<T,D>,E>& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, \
+ const vect<vect<T,D>,E>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const vect<vect<T,D>,E>& b) \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, \
+ const vect<vect<T,D>,E>& b) \
{ \
vect<vect<R,D>,E> r; \
for (int e=0; e<E; ++e) r[e]=fn(a[e],b[e]); \
@@ -118,7 +132,8 @@
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) \
{ \
@@ -128,7 +143,8 @@
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) \
{ \
@@ -150,25 +166,28 @@
#define DECLARE_OPERATOR_1_RET(fn,op,R) \
\
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a) PURE; \
+ inline vect<R,D> fn (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a) { \
+ inline vect<R,D> fn (const vect<T,D>& a) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=op a[d]; \
return r; \
} \
\
template<typename T,int D> \
- inline vect<R,D> fn (const T& a) PURE; \
+ inline vect<R,D> fn (const T& a) CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const T& a) { \
+ inline vect<R,D> fn (const T& a) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=op a; \
return r; \
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a) PURE; \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a) \
{ \
@@ -178,7 +197,7 @@
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const T& a) PURE; \
+ inline vect<vect<R,D>,E> fn (const T& a) CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const T& a) \
{ \
@@ -194,36 +213,45 @@
#define DECLARE_OPERATOR_2_RET(fn,op,R) \
\
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) PURE; \
+ inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) { \
+ inline vect<R,D> fn (const vect<T,D>& a, const vect<T,D>& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=a[d] op b[d]; \
return r; \
} \
\
template<typename T,int D> \
- inline vect<R,D> fn (const T& a, const vect<T,D>& b) PURE; \
+ inline vect<R,D> fn (const T& a, const vect<T,D>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const T& a, const vect<T,D>& b) { \
+ inline vect<R,D> fn (const T& a, const vect<T,D>& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=a op b[d]; \
return r; \
} \
\
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const T& b) PURE; \
+ inline vect<R,D> fn (const vect<T,D>& a, const T& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline vect<R,D> fn (const vect<T,D>& a, const T& b) { \
+ inline vect<R,D> fn (const vect<T,D>& a, const T& b) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=a[d] op b; \
return r; \
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const vect<vect<T,D>,E>& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, \
+ const vect<vect<T,D>,E>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const vect<vect<T,D>,E>& b) \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, \
+ const vect<vect<T,D>,E>& b) \
{ \
vect<vect<R,D>,E> r; \
for (int e=0; e<E; ++e) r[e]=a[e] op b[e]; \
@@ -231,7 +259,8 @@
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const T& a, const vect<vect<T,D>,E>& b) \
{ \
@@ -241,14 +270,15 @@
} \
\
template<typename T,int D,int E> \
- inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) PURE; \
+ inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) \
+ CCTK_ATTRIBUTE_PURE; \
template<typename T,int D,int E> \
inline vect<vect<R,D>,E> fn (const vect<vect<T,D>,E>& a, const T& b) \
{ \
vect<vect<R,D>,E> r; \
for (int e=0; e<E; ++e) r[e]=a[e] op b; \
return r; \
- }
+ }
@@ -264,18 +294,20 @@
#define DECLARE_REDUCTION_OPERATOR_1_T_RET(fn,init,op,final,T,R) \
\
template<typename U,int D> \
- inline vect<R,D> fn (const vect<U,D>& a) PURE; \
+ inline vect<R,D> fn (const vect<U,D>& a) CCTK_ATTRIBUTE_PURE; \
template<typename U,int D> \
- inline vect<R,D> fn (const vect<U,D>& a) { \
+ inline vect<R,D> fn (const vect<U,D>& a) \
+ { \
vect<R,D> r; \
for (int d=0; d<D; ++d) r[d]=fn(a[d]); \
return r; \
} \
\
template<int D> \
- inline R fn (const vect<T,D>& a) PURE; \
+ inline R fn (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE; \
template<int D> \
- inline R fn (const vect<T,D>& a) { \
+ inline R fn (const vect<T,D>& a) \
+ { \
R r(init); \
for (int d=0; d<D; ++d) r op a[d]; \
return final(r); \
@@ -285,15 +317,16 @@
// Declare a reduction function which takes 1 argument
-#define DECLARE_REDUCTION_OPERATOR_1(fn,init,op,final) \
- \
- template<typename T,int D> \
- inline T fn (const vect<T,D>& a) PURE; \
- template<typename T,int D> \
- inline T fn (const vect<T,D>& a) { \
- T r(init); \
- for (int d=0; d<D; ++d) r op a[d]; \
- return final(r); \
+#define DECLARE_REDUCTION_OPERATOR_1(fn,init,op,final) \
+ \
+ template<typename T,int D> \
+ inline T fn (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE; \
+ template<typename T,int D> \
+ inline T fn (const vect<T,D>& a) \
+ { \
+ T r(init); \
+ for (int d=0; d<D; ++d) r op a[d]; \
+ return final(r); \
}
@@ -303,9 +336,10 @@
#define DECLARE_REDUCTION_FUNCTION_1(fn,init,op,final) \
\
template<typename T,int D> \
- inline T fn (const vect<T,D>& a) PURE; \
+ inline T fn (const vect<T,D>& a) CCTK_ATTRIBUTE_PURE; \
template<typename T,int D> \
- inline T fn (const vect<T,D>& a) { \
+ inline T fn (const vect<T,D>& a) \
+ { \
T r(init); \
for (int d=0; d<D; ++d) op(r,a[d]); \
return final(r); \
@@ -315,15 +349,17 @@
// Declare a reduction function which takes 2 arguments
-#define DECLARE_REDUCTION_OPERATOR_2(fn,init,op,op2,final) \
- \
- template<typename T,int D> \
- inline T fn (const vect<T,D>& a, const vect<T,D>& b) PURE; \
- template<typename T,int D> \
- inline T fn (const vect<T,D>& a, const vect<T,D>& b) { \
- T r(init); \
- for (int d=0; d<D; ++d) r op (a[d] op2 b[d]); \
- return final(r); \
+#define DECLARE_REDUCTION_OPERATOR_2(fn,init,op,op2,final) \
+ \
+ template<typename T,int D> \
+ inline T fn (const vect<T,D>& a, const vect<T,D>& b) \
+ CCTK_ATTRIBUTE_PURE; \
+ template<typename T,int D> \
+ inline T fn (const vect<T,D>& a, const vect<T,D>& b) \
+ { \
+ T r(init); \
+ for (int d=0; d<D; ++d) r op (a[d] op2 b[d]); \
+ return final(r); \
}