diff options
Diffstat (limited to 'Carpet/CarpetLib/src')
34 files changed, 1145 insertions, 554 deletions
diff --git a/Carpet/CarpetLib/src/cacheinfo.cc b/Carpet/CarpetLib/src/cacheinfo.cc new file mode 100644 index 000000000..897cb533e --- /dev/null +++ b/Carpet/CarpetLib/src/cacheinfo.cc @@ -0,0 +1,162 @@ +#include "cacheinfo.hh" + +#include <cctk_Parameters.h> + +#include <vectors.h> + + + +template<int D> +vect<int,D> +pad_shape (bbox<int,D> const& extent) +{ + assert (all (extent.shape() >= 0)); + return pad_shape(extent.shape() / extent.stride()); +} + + + +template<int D> +vect<int,D> +pad_shape (vect<int,D> const& shape) +{ + DECLARE_CCTK_PARAMETERS; + + assert (all(shape>=0)); + + static bool have_cacheinfo = false; + static vector<cacheinfo_t> cacheinfo; + if (not have_cacheinfo) { + // Ignore L1 caches that are probably too small to be useful (e.g. + // on Intel or AMD processors) + // TODO: make this a parameter + if (D1size >= 128*1024) { + cacheinfo.push_back(cacheinfo_t(D1size, D1linesize, D1assoc)); + } +#if 0 + // TODO: this is too simplistic: + // Add page size as a cache + if (pagesize>0) { + cacheinfo.push_back(cacheinfo_t(pagesize)); + } +#endif + if (L2size>0) { + cacheinfo.push_back(cacheinfo_t(L2size, L2linesize, L2assoc)); + } + if (L3size>0) { + cacheinfo.push_back(cacheinfo_t(L3size, L3linesize, L3assoc)); + } + if (TLB_D1entries>0) { + ptrdiff_t const TLB_D1size = TLB_D1entries * TLB_D1pagesize * TLB_D1assoc; + cacheinfo.push_back(cacheinfo_t(TLB_D1size, TLB_D1pagesize, TLB_D1assoc)); + } + if (TLB_L2entries>0) { + ptrdiff_t const TLB_L2size = TLB_L2entries * TLB_L2pagesize * TLB_L2assoc; + cacheinfo.push_back(cacheinfo_t(TLB_L2size, TLB_L2pagesize, TLB_L2assoc)); + } + + // TODO: sort caches by their sizes + for (size_t n=0; n<cacheinfo.size(); ++n) { + cacheinfo_t const& ci = cacheinfo.at(n); + if (n>0) { + // Ensure that the cache size is larger than the next lower + // cache size + assert (ci.size() > cacheinfo.at(n-1).size()); + // Ensure that the cache line size is evenly divided by the + // next lower cache line size + assert (ci.linesize() % cacheinfo.at(n-1).linesize() == 0); + assert (ci.stride() > cacheinfo.at(n-1).stride()); + } + } // for cacheinfo + + have_cacheinfo = true; + } // if not have_cacheinfo + + vect<int,D> padded_shape; + int accumulated_npoints = 1; + for (int d=0; d<D; ++d) { + int npoints = shape[d]; + + if (d == 0) { +#if VECTORISE && VECTORISE_ALIGNED_ARRAYS + // Pad array to a multiple of the vector size. Note that this is + // a hard requirement, so that we can emit aligned load/store + // operations. + npoints = align_up (npoints, CCTK_REAL_VEC_SIZE); +#endif + if (vector_size > 0) { + npoints = align_up (npoints, vector_size); + } + } + + for (size_t n=0; n<cacheinfo.size(); ++n) { + cacheinfo_t const& ci = cacheinfo.at(n); + + // Pad array in this direction to a multiple of this cache line + // size + assert (ci.linesize() % sizeof(CCTK_REAL) == 0); + int const linesize = ci.linesize() / sizeof(CCTK_REAL); + assert (is_power_of_2(linesize)); + if (npoints * accumulated_npoints >= linesize) { + // The extent is at least one cache line long: round up to the + // next full cache line + npoints = align_up (npoints, linesize); + } else { +#if 0 + // The extent is less than one cache line long: Ensure that + // the array size divides the cache line size evenly by + // rounding to the next power of 2 + // NOTE: This is disabled, since this would align everything + // to powers of 2. + npoints = next_power_of_2(npoints); +#endif + } + + // Avoid multiples of the cache stride + assert (ci.stride() % sizeof(CCTK_REAL) == 0); + int const stride = ci.stride() / sizeof(CCTK_REAL); + if (npoints * accumulated_npoints % stride == 0) { + assert (linesize < stride); + npoints += linesize; + } + + } // for cacheinfo + + padded_shape[d] = npoints; + accumulated_npoints *= npoints; + } + assert (prod (padded_shape) == accumulated_npoints); + + // self-check + for (int d=0; d<D; ++d) { + assert (padded_shape[d] >= shape[d]); +#if VECTORISE && VECTORISE_ALIGNED_ARRAYS + if (d == 0) { + assert (padded_shape[d] % CCTK_REAL_VEC_SIZE == 0); + } +#endif + if (vector_size > 0) { + if (d == 0) { + assert (padded_shape[d] % vector_size == 0); + } + } + + // TODO: add self-checks for the other requirements as well + } + + if (verbose) { + ostringstream buf; + buf << "padding " << shape << " to " << padded_shape; + CCTK_INFO (buf.str().c_str()); + } + + return padded_shape; +} + + + +template vect<int,3> pad_shape (bbox<int,3> const& extent); +template vect<int,3> pad_shape (vect<int,3> const& shape); + +template vect<int,4> pad_shape (bbox<int,4> const& extent); +template vect<int,4> pad_shape (vect<int,4> const& shape); diff --git a/Carpet/CarpetLib/src/cacheinfo.hh b/Carpet/CarpetLib/src/cacheinfo.hh new file mode 100644 index 000000000..14b464a86 --- /dev/null +++ b/Carpet/CarpetLib/src/cacheinfo.hh @@ -0,0 +1,158 @@ +#ifndef CACHEINFO_HH +#define CACHEINFO_HH + +#include <cctk.h> + +#include <limits> + +#include "bbox.hh" +#include "defs.hh" +#include "vect.hh" + + + +static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return x / align; +} + +static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return (x + align - 1) / align; +} + +static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return div_down(x, align) * align; +} + +static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return div_up(x, align) * align; +} + +static ptrdiff_t next_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t next_power_of_2 (ptrdiff_t const x) +{ + assert (x > 0); + ptrdiff_t res = 1; + while (res < x) { + res *= 2; + assert (res > 0); // try to catch overflows + } + return res; +} + +static ptrdiff_t previous_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t previous_power_of_2 (ptrdiff_t const x) +{ + return next_power_of_2(x/2+1); +} + +static bool is_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static bool is_power_of_2 (ptrdiff_t const x) +{ + return x == next_power_of_2(x); +} + +#if 0 +static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y) +{ + assert (x > 0 && y > 0); + ptrdiff_t z = x; + // TODO: improve LCM algorithm + while (z % y != 0) z += x; + assert (z % x == 0 && z % y == 0); + return z; +} +#endif + + + +class cacheinfo_t { + ptrdiff_t m_size; // bytes + ptrdiff_t m_linesize; // bytes (pagesize for TLBs) + ptrdiff_t m_associativity; +public: + bool invariant () const + { + return + is_power_of_2(m_size) and + is_power_of_2(m_linesize) and + is_power_of_2(m_associativity) and + m_size % (m_linesize * m_associativity) == 0; + } + cacheinfo_t (ptrdiff_t const a_size, + ptrdiff_t const a_linesize, + ptrdiff_t const a_associativity) + : m_size (a_size), + m_linesize (a_linesize), + m_associativity (a_associativity) + { + assert (invariant()); + } + cacheinfo_t (ptrdiff_t const a_linesize) + : m_size (previous_power_of_2(numeric_limits<ptrdiff_t>::max())), + m_linesize (a_linesize), + m_associativity (1) + { + assert (invariant()); + } + // size in bytes + ptrdiff_t size() const + { + return m_size; + } + // line size in bytes + ptrdiff_t linesize() const + { + return m_linesize; + } + // associativity + ptrdiff_t associativity() const + { + return m_associativity; + } + // number of cache elements + ptrdiff_t num_elements() const + { + return size() / (linesize() * associativity()); + } + // stride (between main memory locations that use the same cache + // element) in bytes + ptrdiff_t stride() const + { + return num_elements() * linesize(); + } +}; + + + +// These routines are apparently not pure -- don't know why +template<int D> +vect<int,D> pad_shape (bbox<int,D> const& extent) /*CCTK_ATTRIBUTE_PURE*/; +template<int D> +vect<int,D> pad_shape (vect<int,D> const& shape) /*CCTK_ATTRIBUTE_PURE*/; + +#endif // CACHEINFO_HH diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc index 11eedc0c2..7cb4ddb80 100644 --- a/Carpet/CarpetLib/src/copy_3d.cc +++ b/Carpet/CarpetLib/src/copy_3d.cc @@ -22,9 +22,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -32,8 +34,10 @@ namespace CarpetLib { template <typename T> void copy_3d (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -48,7 +52,6 @@ namespace CarpetLib { dstbbox.stride() != dstregbbox.stride() or srcregbbox1.stride() != dstregbbox.stride())) { -#pragma omp critical { cout << "copy_3d.cc:" << endl << "srcbbox=" << srcbbox << endl @@ -66,7 +69,6 @@ namespace CarpetLib { // This could be handled, but is likely to point to an error // elsewhere if (dstregbbox.empty()) { -#pragma omp critical CCTK_WARN (0, "Internal error: region extent is empty"); } @@ -79,7 +81,6 @@ namespace CarpetLib { if (not srcregbbox.is_contained_in(srcbbox) or not dstregbbox.is_contained_in(dstbbox)) { -#pragma omp critical { cout << "copy_3d.cc:" << endl << "srcbbox=" << srcbbox << endl @@ -93,13 +94,6 @@ namespace CarpetLib { } } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { -#pragma omp critical - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = dstregbbox.shape() / dstregbbox.stride(); @@ -110,6 +104,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -152,8 +154,10 @@ namespace CarpetLib { template \ void \ copy_3d (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc index 7aed29aaf..b0cc024f4 100644 --- a/Carpet/CarpetLib/src/copy_4d.cc +++ b/Carpet/CarpetLib/src/copy_4d.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND4(i,j,k,l) \ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) -#define DSTIND4(i,j,k,l) \ - index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ +#define DSTIND4(i,j,k,l) \ + index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void copy_4d (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -68,12 +71,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect4 const regext = regbbox.shape() / regbbox.stride(); @@ -84,6 +81,16 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + ptrdiff_t const srclpadext = srcpadext[3]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const dstlpadext = dstpadext[3]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -133,8 +140,10 @@ namespace CarpetLib { template \ void \ copy_4d (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 47cb392bf..775f5ec50 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -18,6 +18,7 @@ #include "bbox.hh" #include "bboxset.hh" +#include "cacheinfo.hh" #include "defs.hh" #include "dist.hh" #include "timestat.hh" @@ -37,8 +38,10 @@ static void call_operator (void (* the_operator) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -46,8 +49,10 @@ call_operator (void ibbox3 const & restrict dstregbbox, void * const extraargs), T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -90,7 +95,7 @@ call_operator (void dstregbbox.stride()); if (not mydstregbbox.empty()) { (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, mydstregbbox, extraargs); # ifdef CARPET_DEBUG #pragma omp critical @@ -113,8 +118,10 @@ static void call_operator (void (* the_operator) (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -122,8 +129,10 @@ call_operator (void ibbox4 const & restrict dstregbbox, void * const extraargs), T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -133,7 +142,7 @@ call_operator (void { #ifndef _OPENMP (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, dstregbbox, extraargs); #else # if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) @@ -166,7 +175,7 @@ call_operator (void dstregbbox.stride()); if (not mydstregbbox.empty()) { (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, mydstregbbox, extraargs); # if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) #pragma omp critical @@ -191,8 +200,10 @@ call_operator (void template <typename T> void prolongate_3d_eno (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -207,8 +218,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -217,8 +230,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno) template <> void prolongate_3d_eno (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -229,8 +244,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_real8_eno) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -242,8 +259,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_weno (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -258,8 +277,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -268,8 +289,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno) template <> void prolongate_3d_weno (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -279,8 +302,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src, { CCTK_FNAME(prolongate_3d_real8_weno) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -292,8 +317,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_tvd (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -309,8 +336,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -319,8 +348,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd) template <> void prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -331,8 +362,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_real8_tvd) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -345,8 +378,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_cc_tvd (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -362,8 +397,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -372,8 +409,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd) template <> void prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -384,8 +423,10 @@ prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_cc_real8_tvd) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -466,16 +507,13 @@ void data<T>::allocate (const ibbox& extent_, // data _extent = extent_; - //_shape = max (ivect(0), _extent.shape() / _extent.stride()); - _shape = allocated_memory_shape (_extent); - assert (all (_shape >= max (ivect(0), _extent.shape() / _extent.stride()))); - - _size = 1; - for (int d=0; d<dim; ++d) { - _stride[d] = _size; - assert (_shape[d]==0 or _size <= numeric_limits<int>::max() / _shape[d]); - _size *= _shape[d]; + _shape = max (ivect(0), _extent.shape() / _extent.stride()); + _padded_shape = pad_shape(_shape); + _stride[0] = 1; + for (int d=1; d<dim; ++d) { + _stride[d] = _stride[d-1] * _padded_shape[d-1]; } + _size = _stride[dim-1] * _padded_shape[dim-1]; _proc = proc_; if (dist::rank() == _proc) { @@ -512,9 +550,7 @@ size_t data<T>::allocsize (const ibbox & extent_, const int proc_) const if (dist::rank() != proc_) return 0; if (vectorindex != 0) return 0; assert (not vectorleader); - ivect const shape_ = allocated_memory_shape (extent_); - //return vectorlength * extent_.size() * sizeof (T); - return vectorlength * prod(shape_) * sizeof (T); + return vectorlength * prod (pad_shape (extent_)) * sizeof (T); } @@ -541,17 +577,17 @@ copy_from_innerloop (gdata const * const gsrc, #if CARPET_DIM == 3 // Don't use call_operator, because we parallelise ourselves copy_3d(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, (void*)slabinfo); #elif CARPET_DIM == 4 // Don't use call_operator, because we parallelise ourselves copy_4d(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, (void*)slabinfo); #else @@ -725,8 +761,10 @@ transfer_prolongate (data const * const src, case vertex_centered: { static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -762,9 +800,9 @@ transfer_prolongate (data const * const src, } call_operator<T> (the_operators[order_space], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -774,9 +812,9 @@ transfer_prolongate (data const * const src, if (use_dgfe) { // Don't use call_operator, because we parallelise ourselves prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast<T *>(this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -784,8 +822,10 @@ transfer_prolongate (data const * const src, } static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -806,9 +846,9 @@ transfer_prolongate (data const * const src, } call_operator<T> (the_operators[order_space], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -835,9 +875,9 @@ transfer_prolongate (data const * const src, case 3: call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -848,9 +888,9 @@ transfer_prolongate (data const * const src, // hydro, so we cheat here. call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -860,13 +900,15 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"ENO\" with order_space!=3"); break; } + break; } - break; case cell_centered: { static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -937,18 +979,20 @@ transfer_prolongate (data const * const src, call_operator<T> (the_operators[order_space-2], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); + break; } - break; + default: + assert(0); } timer.stop (0); + break; } - break; case op_WENO: { static Timer timer ("prolongate_WENO"); timer.start (); @@ -967,9 +1011,9 @@ transfer_prolongate (data const * const src, case 5: call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -979,15 +1023,15 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"WENO\" with order_space!=5"); break; } - + break; } - break; case cell_centered: { CCTK_WARN (CCTK_WARN_ABORT, "There are currently no cell-centred stencils for op=\"WENO\""); break; } - break; + default: + assert(0); } timer.stop (0); } @@ -1002,9 +1046,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_3d_tvd, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1021,9 +1065,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_3d_cc_tvd, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1035,6 +1079,8 @@ transfer_prolongate (data const * const src, } break; } + default: + assert(0); } timer.stop (0); break; @@ -1055,9 +1101,9 @@ transfer_prolongate (data const * const src, case 5: call_operator<T> (& prolongate_3d_o5_monotone_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1090,9 +1136,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_4d_o1_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1164,9 +1210,9 @@ transfer_restrict (data const * const src, assert (not slabinfo); call_operator<T> (& restrict_3d_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcregbox, dstregbox, NULL); @@ -1182,9 +1228,9 @@ transfer_restrict (data const * const src, if (use_dgfe) { // Don't use call_operator, because we parallelise ourselves restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast<T *>(this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); @@ -1194,37 +1240,37 @@ transfer_restrict (data const * const src, transport_operator != op_WENO and transport_operator != op_ENO) { // HACK switch (restriction_order_space) { - case 1: + case 1: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); break; - case 3: + case 3: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_o3_rf2(static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + src->padded_shape(), src->shape(), + static_cast <T *> (this->storage()), + this->padded_shape(), this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); break; - case 5: + case 5: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_o5_rf2(static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + src->padded_shape(), src->shape(), + static_cast <T *> (this->storage()), + this->padded_shape(), this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); break; - default: + default: CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "There is no restriction stencil with restriction_order_space==%d", restriction_order_space); break; @@ -1233,36 +1279,36 @@ transfer_restrict (data const * const src, } // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(0,1,1))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,0,1))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,1,0))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); @@ -1291,9 +1337,9 @@ transfer_restrict (data const * const src, case vertex_centered: // Don't use call_operator, because we parallelise ourselves restrict_4d_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcregbox, dstregbox, NULL); @@ -1357,10 +1403,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(0), static_cast <T const *> (srcs.AT(1)->storage()), times.AT(1), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1374,10 +1420,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(1), static_cast <T const *> (srcs.AT(2)->storage()), times.AT(2), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1393,10 +1439,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(2), static_cast <T const *> (srcs.AT(3)->storage()), times.AT(3), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1414,10 +1460,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(3), static_cast <T const *> (srcs.AT(4)->storage()), times.AT(4), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1446,10 +1492,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(0), static_cast <T const *> (srcs.AT(1)->storage()), times.AT(1), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1463,10 +1509,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(1), static_cast <T const *> (srcs.AT(2)->storage()), times.AT(2), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1535,6 +1581,8 @@ output (ostream & os) T Tdummy; os << "data<" << typestring(Tdummy) << ">:" << "extent=" << extent() << "," + << "shape=" << shape() << "," + << "padded_shape=" << padded_shape() << "," << "stride=" << stride() << ",size=" << size(); return os; } diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index 156e9cbbd..67acbe453 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -19,6 +19,7 @@ #endif #include "bbox.hh" +#include "cacheinfo.hh" #include "commstate.hh" #include "defs.hh" #include "dist.hh" @@ -115,6 +116,7 @@ gdata::~gdata () +#if 0 // Storage management template<int D> @@ -181,6 +183,7 @@ allocated_memory_shape (vect<int,D> shape) done:; return shape; } +#endif @@ -262,8 +265,8 @@ transfer_from (comm_state & state, find_source_timelevel (times, time, order_time, my_transport_operator, timelevel0, ntimelevels); if (is_src) assert (int (srcs.size()) >= ntimelevels); - int const dstpoints = prod(allocated_memory_shape(dstbox)); - int const srcpoints = prod(allocated_memory_shape(srcbox)) * ntimelevels; + int const dstpoints = prod(pad_shape(dstbox)); + int const srcpoints = prod(pad_shape(srcbox)) * ntimelevels; bool const interp_on_src = dstpoints <= srcpoints; int const npoints = interp_on_src ? dstpoints : srcpoints; @@ -289,10 +292,10 @@ transfer_from (comm_state & state, // copy the data into the send buffer if (interp_on_src) { size_t const sendbufsize = - src->c_datatype_size() * prod(allocated_memory_shape(dstbox)); + src->c_datatype_size() * prod(pad_shape(dstbox)); void * const sendbuf = state.send_buffer (src->c_datatype(), dstproc, - prod(allocated_memory_shape(dstbox))); + prod(pad_shape(dstbox))); gdata * const buf = src->make_typed (src->varindex, src->cent, src->transport_operator); buf->allocate (dstbox, srcproc, sendbuf, sendbufsize); @@ -301,14 +304,14 @@ transfer_from (comm_state & state, time, order_space, order_time); delete buf; state.commit_send_space - (src->c_datatype(), dstproc, prod(allocated_memory_shape(dstbox))); + (src->c_datatype(), dstproc, prod(pad_shape(dstbox))); } else { for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) { size_t const sendbufsize = - src->c_datatype_size() * prod(allocated_memory_shape(srcbox)); + src->c_datatype_size() * prod(pad_shape(srcbox)); void * const sendbuf = state.send_buffer (src->c_datatype(), dstproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); gdata * const buf = src->make_typed (src->varindex, src->cent, src->transport_operator); @@ -316,7 +319,7 @@ transfer_from (comm_state & state, buf->copy_from_innerloop (srcs.AT(tl), srcbox, srcbox, NULL); delete buf; state.commit_send_space (src->c_datatype(), dstproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); } } } @@ -337,14 +340,13 @@ transfer_from (comm_state & state, // copy from the recv buffer if (interp_on_src) { size_t const recvbufsize = - c_datatype_size() * prod(allocated_memory_shape(dstbox)); + c_datatype_size() * prod(pad_shape(dstbox)); void * const recvbuf = - state.recv_buffer (c_datatype(), srcproc, - prod(allocated_memory_shape(dstbox))); + state.recv_buffer (c_datatype(), srcproc, prod(pad_shape(dstbox))); gdata * const buf = make_typed (varindex, cent, transport_operator); buf->allocate (dstbox, dstproc, recvbuf, recvbufsize); state.commit_recv_space (c_datatype(), srcproc, - prod(allocated_memory_shape(dstbox))); + prod(pad_shape(dstbox))); copy_from_innerloop (buf, dstbox, dstbox, NULL); delete buf; } else { @@ -353,14 +355,14 @@ transfer_from (comm_state & state, vector <CCTK_REAL> timebuf (ntimelevels); for (int tl = 0; tl < ntimelevels; ++ tl) { size_t const recvbufsize = - c_datatype_size() * prod(allocated_memory_shape(srcbox)); + c_datatype_size() * prod(pad_shape(srcbox)); void * const recvbuf = state.recv_buffer (c_datatype(), srcproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); gdata * const buf = make_typed (varindex, cent, transport_operator); buf->allocate (srcbox, dstproc, recvbuf, recvbufsize); state.commit_recv_space - (c_datatype(), srcproc, prod(allocated_memory_shape(srcbox))); + (c_datatype(), srcproc, prod(pad_shape(srcbox))); bufs.AT(tl) = buf; timebuf.AT(tl) = times.AT(timelevel0 + tl); } @@ -469,11 +471,3 @@ allmemory () } return mem; } - - - -template vect<int,3> gdata::allocated_memory_shape (bbox<int,3> const& extent); -template vect<int,3> gdata::allocated_memory_shape (vect<int,3> shape); - -template vect<int,4> gdata::allocated_memory_shape (bbox<int,4> const& extent); -template vect<int,4> gdata::allocated_memory_shape (vect<int,4> shape); diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index ea44bc6fc..129fccc1c 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -71,7 +71,8 @@ protected: int _proc; // stored on processor - ivect _shape, _stride; // shape and index order + ivect _shape; // shape + ivect _padded_shape, _stride; // allocated shape and index order ibbox _extent; // bbox for all data @@ -105,10 +106,6 @@ public: 0; virtual void free () = 0; virtual size_t allocsize (const ibbox& extent, const int proc) const = 0; - template<int D> - static vect<int,D> allocated_memory_shape (bbox<int,D> const& extent); - template<int D> - static vect<int,D> allocated_memory_shape (vect<int,D> shape); // Accessors bool has_storage () const { @@ -145,6 +142,11 @@ public: return _shape; } + const ivect& padded_shape () const { + assert (_has_storage); + return _padded_shape; + } + const ivect& stride () const { assert (_has_storage); return _stride; @@ -164,8 +166,10 @@ public: assert (_has_storage); assert (all((index-extent().lower()) % extent().stride() == 0)); ivect const ind = (index-extent().lower()) / extent().stride(); - assert (all(ind>=0 and ind<=shape())); - return dot(ind, stride()); + assert (all(ind>=0 and ind<shape())); + int const off = dot(ind, stride()); + assert (off>=0 and off<size()); + return off; } private: diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc index 9590632ad..ddfe15254 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -35,9 +36,11 @@ namespace CarpetLib { CCTK_REAL const t1, T const * restrict const src2, CCTK_REAL const t2, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -73,12 +76,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -89,6 +86,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -129,7 +134,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_2tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -142,21 +148,23 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_2tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_2tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc index eaf24bea5..ce8249998 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -37,9 +38,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -75,12 +78,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -91,6 +88,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -133,7 +138,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_3tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -147,23 +153,25 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_3tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_3tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc index 16039c7d8..74bca1837 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -39,9 +40,11 @@ namespace CarpetLib { CCTK_REAL const t3, T const * restrict const src4, CCTK_REAL const t4, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -77,12 +80,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -93,6 +90,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -140,7 +145,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_4tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -155,25 +161,27 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_4tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - T const * restrict const src4, \ - CCTK_REAL const t4, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_4tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + T const * restrict const src4, \ + CCTK_REAL const t4, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc index bb7f4d06d..74913a131 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -41,9 +42,11 @@ namespace CarpetLib { CCTK_REAL const t4, T const * restrict const src5, CCTK_REAL const t5, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -79,12 +82,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -95,6 +92,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -145,7 +150,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_5tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -161,27 +167,29 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_5tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - T const * restrict const src4, \ - CCTK_REAL const t4, \ - T const * restrict const src5, \ - CCTK_REAL const t5, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_5tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + T const * restrict const src4, \ + CCTK_REAL const t4, \ + T const * restrict const src5, \ + CCTK_REAL const t5, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc index 969ec4bbf..5d56ed3e8 100644 --- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -55,9 +56,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -93,12 +96,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -109,6 +106,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -172,7 +177,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3 (interpolate_end_3d_3tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + srcipadext,srcjpadext,srckpadext) { T const s1 = src1 [SRCIND3(i, j, k)]; @@ -209,9 +215,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX8 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX8 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -234,9 +242,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX16 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX16 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -257,9 +267,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX32 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX32 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -282,9 +294,11 @@ namespace CarpetLib { CCTK_REAL const t2, \ T const * restrict const src3, \ CCTK_REAL const t3, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/make.code.defn b/Carpet/CarpetLib/src/make.code.defn index 0956754cc..aab0f5372 100644 --- a/Carpet/CarpetLib/src/make.code.defn +++ b/Carpet/CarpetLib/src/make.code.defn @@ -7,6 +7,7 @@ SRCS = backtrace.cc \ bboxset.cc \ bboxtree.cc \ bintree.cc \ + cacheinfo.cc \ commstate.cc \ data.cc \ defs.cc \ diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc index 27f86e951..f3a4fa6ff 100644 --- a/Carpet/CarpetLib/src/mem.cc +++ b/Carpet/CarpetLib/src/mem.cc @@ -20,7 +20,12 @@ using namespace std; -double const gmem::MEGA = 1000*1000; +double const gmem::KILO = 1000.0; +double const gmem::MEGA = 1000.0*1000.0; +double const gmem::GIGA = 1000.0*1000.0*1000.0; +double const gmem::TERA = 1000.0*1000.0*1000.0*1000.0; +double const gmem::PETA = 1000.0*1000.0*1000.0*1000.0*1000.0; +double const gmem::EXA = 1000.0*1000.0*1000.0*1000.0*1000.0*1000.0; // Total number of currently allocated bytes and objects double gmem::total_allocated_bytes = 0; @@ -38,7 +43,8 @@ template<typename T> mem<T>:: mem (size_t const vectorlength, size_t const nelems, T * const memptr, size_t const memsize) - : storage_ (memptr), + : storage_base_ (memptr), + storage_ (memptr), nelems_ (nelems), vectorlength_ (vectorlength), owns_storage_ (false), @@ -62,7 +68,10 @@ mem (size_t const vectorlength, size_t const nelems, int(max_allowed_memory_MB)); } try { - storage_ = new T [vectorlength * nelems]; + // TODO: align and pad storage + size_t const padding = 0; + storage_base_ = new T [vectorlength * nelems + padding]; + storage_ = storage_base_ + padding; owns_storage_ = true; } catch (...) { T Tdummy; @@ -77,7 +86,9 @@ mem (size_t const vectorlength, size_t const nelems, total_allocated_bytes += nbytes; max_allocated_bytes = max (max_allocated_bytes, total_allocated_bytes); if (poison_new_memory) { - memset (storage_, poison_value, vectorlength * nelems * sizeof (T)); + memset (storage_base_, + poison_value, (vectorlength * nelems + + storage_ - storage_base_) * sizeof (T)); } } else { assert (memsize >= vectorlength * nelems * sizeof (T)); @@ -96,7 +107,7 @@ mem<T>:: { assert (not has_clients()); if (owns_storage_) { - delete [] storage_; + delete [] storage_base_; const double nbytes = vectorlength_ * nelems_ * sizeof (T); total_allocated_bytes -= nbytes; } @@ -144,13 +155,15 @@ memory () const { return + memoryof (storage_base_) + memoryof (storage_) + memoryof (nelems_) + memoryof (vectorlength_) + memoryof (owns_storage_) + memoryof (clients_) + memoryof (num_clients_) + - (owns_storage_ ? sizeof (T) * vectorlength_ * nelems_ : 0); + (owns_storage_ ? (vectorlength_ * nelems_ + + storage_ - storage_base_) : 0) * sizeof (T); } diff --git a/Carpet/CarpetLib/src/mem.hh b/Carpet/CarpetLib/src/mem.hh index ff7f05080..807daff70 100644 --- a/Carpet/CarpetLib/src/mem.hh +++ b/Carpet/CarpetLib/src/mem.hh @@ -19,7 +19,12 @@ class gmem { public: + static double const KILO; static double const MEGA; + static double const GIGA; + static double const TERA; + static double const PETA; + static double const EXA ; // Total number of currently allocated bytes and objects static double total_allocated_bytes; @@ -35,6 +40,7 @@ public: template<typename T> class mem: public gmem { + T * storage_base_; T * storage_; size_t nelems_; size_t vectorlength_; diff --git a/Carpet/CarpetLib/src/operator_prototypes_3d.hh b/Carpet/CarpetLib/src/operator_prototypes_3d.hh index 6b344b7c3..6d8850726 100644 --- a/Carpet/CarpetLib/src/operator_prototypes_3d.hh +++ b/Carpet/CarpetLib/src/operator_prototypes_3d.hh @@ -22,6 +22,7 @@ namespace CarpetLib { static inline size_t index3 (size_t const i, size_t const j, size_t const k, + size_t const padexti, size_t const padextj, size_t const padextk, size_t const exti, size_t const extj, size_t const extk) { #ifdef CARPET_DEBUG @@ -30,7 +31,7 @@ namespace CarpetLib { assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk); #endif - return i + exti * (j + extj * k); + return i + padexti * (j + padextj * k); } @@ -56,8 +57,10 @@ namespace CarpetLib { template <typename T> void copy_3d (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -70,8 +73,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -79,87 +84,15 @@ namespace CarpetLib { ibbox3 const & restrict dstregbbox, void * extraargs); -#if 0 - template <typename T> - void - prolongate_3d_o1_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o3_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o5_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o7_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o9_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o11_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); -#endif - template <typename T> void prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -172,8 +105,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -181,49 +116,13 @@ namespace CarpetLib { ibbox3 const & restrict dstregbbox, void * extraargs); -#if 0 - template <typename T> - void - prolongate_3d_cc_o0_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_cc_o1_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_cc_o2_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); -#endif - template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -235,8 +134,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -260,8 +161,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_dgfe_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -274,8 +177,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -291,9 +196,11 @@ namespace CarpetLib { CCTK_REAL const t1, T const * restrict const src2, CCTK_REAL const t2, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -309,9 +216,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -329,9 +238,11 @@ namespace CarpetLib { CCTK_REAL const t3, T const * restrict const src4, CCTK_REAL const t4, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -351,9 +262,11 @@ namespace CarpetLib { CCTK_REAL const t4, T const * restrict const src5, CCTK_REAL const t5, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -369,9 +282,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -384,8 +299,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -398,8 +315,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o3_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -412,8 +331,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o5_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -426,8 +347,10 @@ namespace CarpetLib { template <typename T, int centi, int centj, int centk> void restrict_3d_vc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -440,8 +363,10 @@ namespace CarpetLib { template <typename T, int ORDER> void restrict_3d_dgfe_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, diff --git a/Carpet/CarpetLib/src/operator_prototypes_4d.hh b/Carpet/CarpetLib/src/operator_prototypes_4d.hh index 1d0a4609a..71d1e3bcd 100644 --- a/Carpet/CarpetLib/src/operator_prototypes_4d.hh +++ b/Carpet/CarpetLib/src/operator_prototypes_4d.hh @@ -22,6 +22,7 @@ namespace CarpetLib { static inline size_t index4 (size_t const i, size_t const j, size_t const k, size_t const l, + size_t const padexti, size_t const padextj, size_t const padextk, size_t const padextl, size_t const exti, size_t const extj, size_t const extk, size_t const extl) { #ifdef CARPET_DEBUG @@ -31,7 +32,7 @@ namespace CarpetLib { assert (static_cast <ptrdiff_t> (l) >= 0 and l < extl); #endif - return i + exti * (j + extj * (k + extk * l)); + return i + padexti * (j + padextj * (k + padextk * l)); } @@ -47,8 +48,10 @@ namespace CarpetLib { template <typename T> void copy_4d (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -61,8 +64,10 @@ namespace CarpetLib { template <typename T> void prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -75,8 +80,10 @@ namespace CarpetLib { template <typename T> void restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc index 5cd8fdd2c..7b5c6cee2 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -43,13 +42,22 @@ using namespace std; namespace CarpetLib { - -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -947,8 +955,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -1033,13 +1043,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -1283,8 +1295,10 @@ namespace CarpetLib { template <> \ void \ prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1299,8 +1313,10 @@ namespace CarpetLib { template <> \ void \ prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1324,8 +1340,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1337,8 +1355,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 index b39f43766..c970e41e6 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 @@ -52,18 +52,21 @@ -subroutine prolongate_3d_cc_real8_tvd (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_cc_real8_tvd ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8, parameter :: one=1, fourth=one/4 + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) ! bbox(:,1) is lower boundary (inclusive) ! bbox(:,2) is upper boundary (inclusive) ! bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc index bf960f0aa..269b34947 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -43,13 +42,22 @@ using namespace std; namespace CarpetLib { - -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -320,8 +328,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -406,13 +416,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -656,8 +668,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -668,8 +682,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -680,8 +696,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -692,20 +710,24 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); \ \ template \ void \ prolongate_3d_cc_rf2<T,4> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -716,8 +738,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,5> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc index 640b084c2..3be587551 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc @@ -20,18 +20,30 @@ using namespace hrscc; namespace CarpetLib { -#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext)) -#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext)) -#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext)) -#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext)) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) template<typename T, int ORDER> void prolongate_3d_dgfe_rf2(T const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, T *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, @@ -98,6 +110,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -143,8 +163,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const i=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -182,8 +203,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const j=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -221,8 +243,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const k=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -257,9 +280,10 @@ namespace CarpetLib { // Loop over fine region #ifdef HRSCC_HH #pragma omp parallel for //collapse(3) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_full (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr); @@ -281,8 +305,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_dgfe_rf2<T,5>(T const *restrict const src, \ + ivect3 const& restrict srcpadext, \ ivect3 const& restrict srcext, \ T *restrict const dst, \ + ivect3 const& restrict dstpadext, \ ivect3 const& restrict dstext, \ ibbox3 const& restrict srcbbox, \ ibbox3 const& restrict dstbbox, \ @@ -300,8 +326,10 @@ namespace CarpetLib { template<> void prolongate_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, CCTK_COMPLEX *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc index adc4d0693..48fe2f31b 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc @@ -23,7 +23,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -37,11 +36,14 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) + template <typename T> inline @@ -83,8 +85,10 @@ namespace CarpetLib { template <typename T> void prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -137,13 +141,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -827,8 +833,10 @@ namespace CarpetLib { template <> void prolongate_3d_o5_monotone_rf2 (CCTK_COMPLEX const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -843,8 +851,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_o5_monotone_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 index fbff861c4..94a9e60f1 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 @@ -75,19 +75,22 @@ function eno1d(q) end function eno1d -subroutine prolongate_3d_real8_eno (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_eno ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) !!$ bbox(:,1) is lower boundary (inclusive) !!$ bbox(:,2) is upper boundary (inclusive) !!$ bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 index f58cabf98..230a20bd5 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 @@ -26,19 +26,22 @@ -subroutine prolongate_3d_real8_tvd (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_tvd ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) ! bbox(:,1) is lower boundary (inclusive) ! bbox(:,2) is upper boundary (inclusive) ! bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 index d5ec43427..044132a10 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 @@ -155,19 +155,22 @@ function weno1d(q) end function weno1d -subroutine prolongate_3d_real8_weno (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_weno ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) !!$ bbox(:,1) is lower boundary (inclusive) !!$ bbox(:,2) is upper boundary (inclusive) !!$ bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc index 984d68c07..cdf24a985 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc @@ -8,7 +8,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "vectors.h" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -21,10 +20,20 @@ namespace CarpetLib { -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -473,8 +482,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -550,13 +561,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -801,8 +814,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -813,8 +828,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -825,8 +842,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,5> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -837,8 +856,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,7> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -849,8 +870,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,9> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -861,8 +884,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,11> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc index 1f7e8203e..58b22f719 100644 --- a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -18,11 +17,13 @@ namespace CarpetLib { -#define SRCIND4(i,j,k,l) \ - index4 (i, j, k, l, \ +#define SRCIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) -#define DSTIND4(i,j,k,l) \ - index4 (i, j, k, l, \ +#define DSTIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -30,8 +31,10 @@ namespace CarpetLib { template <typename T> void prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -84,13 +87,17 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + size_t const srclpadext = srcpadext[3]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; + size_t const dstlpadext = dstpadext[3]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -593,8 +600,10 @@ namespace CarpetLib { template \ void \ prolongate_4d_o1_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc index 2f544110a..981398ae3 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -38,14 +39,16 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o3_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict, - ibbox3 const & restrict regbbox, - void * extraargs) + ivect3 const & restrict srcpadext, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstpadext, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict, + ibbox3 const & restrict regbbox, + void * extraargs) { assert (not extraargs); @@ -80,12 +83,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -101,6 +98,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -235,18 +240,20 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_o3_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ - void * extraargs); +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_o3_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ + void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc index a90ebb868..6c90daa44 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc @@ -19,9 +19,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) #define SRCOFF3(i,j,k) \ offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ @@ -107,14 +109,16 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o5_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict, - ibbox3 const & restrict regbbox, - void * extraargs) + ivect3 const & restrict srcpadext, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstpadext, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict, + ibbox3 const & restrict regbbox, + void * extraargs) { assert (not extraargs); @@ -149,12 +153,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -170,6 +168,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -239,18 +245,20 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_o5_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ - void * extraargs); +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_o5_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ + void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index dd419cb92..13b59a7f8 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -29,8 +30,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -71,12 +74,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -92,6 +89,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -127,6 +132,8 @@ namespace CarpetLib { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { + // TODO: Introduce higher-order restriction operators (but + // don't use these for hydro!) dst [DSTIND3(i, j, k)] = + f1*f1*f1 * src [SRCIND3(2*i , 2*j , 2*k )] + f2*f1*f1 * src [SRCIND3(2*i+1, 2*j , 2*k )] @@ -145,17 +152,19 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc index 4b2c0c648..e438d360b 100644 --- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc @@ -20,18 +20,30 @@ using namespace hrscc; namespace CarpetLib { -#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext)) -#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext)) -#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext)) -#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext)) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) template<typename T, int ORDER> void restrict_3d_dgfe_rf2(T const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, T *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, @@ -82,6 +94,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -127,9 +147,10 @@ namespace CarpetLib { // Loop over coarse region #ifdef HRSCC_HH #pragma omp parallel for //collapse(3) - for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) { - for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) { - for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=ORDER+1) { + for (int j=0; j<regjext; j+=ORDER+1) { + for (int i=0; i<regiext; i+=ORDER+1) { GLLElement<ORDER>::restrict_full (&src[SRCIND3(srcioff+2*i, srcjoff+2*j, srckoff+2*k)], srcstr, &dst[DSTIND3(dstioff+i, dstjoff+j, dstkoff+k)], dststr); @@ -149,8 +170,10 @@ namespace CarpetLib { template \ void \ restrict_3d_dgfe_rf2<T,5>(T const *restrict const src, \ + ivect3 const& restrict srcpadext, \ ivect3 const& restrict srcext, \ T *restrict const dst, \ + ivect3 const& restrict dstpadext, \ ivect3 const& restrict dstext, \ ibbox3 const& restrict srcbbox, \ ibbox3 const& restrict dstbbox, \ @@ -168,8 +191,10 @@ namespace CarpetLib { template<> void restrict_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, CCTK_COMPLEX *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc index 5fff24a3e..c1dade7fb 100644 --- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -67,12 +70,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -85,6 +82,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -126,8 +131,10 @@ namespace CarpetLib { template \ void \ restrict_3d_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc index 827fb5dff..44799a60f 100644 --- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) #define SRCOFF3(i,j,k) \ offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ @@ -136,8 +137,10 @@ namespace CarpetLib { template <typename T, int centi, int centj, int centk> void restrict_3d_vc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -188,12 +191,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -217,6 +214,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -284,8 +289,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,0,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -295,8 +302,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,0,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -306,8 +315,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,1,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -317,8 +328,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,1,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -328,8 +341,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,0,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -339,8 +354,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,0,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -350,8 +367,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,1,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -361,8 +380,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,1,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc index 01b66ee78..bd5d7079c 100644 --- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND4(i,j,k,l) \ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) #define DSTIND4(i,j,k,l) \ index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -67,12 +70,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect4 const regext = regbbox.shape() / regbbox.stride(); @@ -83,6 +80,16 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + ptrdiff_t const srclpadext = srcpadext[3]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const dstlpadext = dstpadext[3]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -132,8 +139,10 @@ namespace CarpetLib { template \ void \ restrict_4d_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ |