aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Carpet/Carpet/interface.ccl1
-rw-r--r--Carpet/Carpet/src/modes.cc6
-rw-r--r--Carpet/CarpetIOHDF5/src/Input.cc12
-rw-r--r--Carpet/CarpetIOHDF5/src/Output.cc40
-rw-r--r--Carpet/CarpetInterp2/interface.ccl1
-rw-r--r--Carpet/CarpetInterp2/src/fasterp.cc150
-rw-r--r--Carpet/CarpetInterp2/src/fasterp.hh45
-rw-r--r--Carpet/CarpetLib/configuration.ccl2
-rw-r--r--Carpet/CarpetLib/interface.ccl1
-rw-r--r--Carpet/CarpetLib/param.ccl111
-rw-r--r--Carpet/CarpetLib/src/cacheinfo.cc162
-rw-r--r--Carpet/CarpetLib/src/cacheinfo.hh158
-rw-r--r--Carpet/CarpetLib/src/copy_3d.cc24
-rw-r--r--Carpet/CarpetLib/src/copy_4d.cc27
-rw-r--r--Carpet/CarpetLib/src/data.cc230
-rw-r--r--Carpet/CarpetLib/src/gdata.cc40
-rw-r--r--Carpet/CarpetLib/src/gdata.hh18
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_2tl.cc54
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_3tl.cc58
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_4tl.cc62
-rw-r--r--Carpet/CarpetLib/src/interpolate_3d_5tl.cc66
-rw-r--r--Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc30
-rw-r--r--Carpet/CarpetLib/src/make.code.defn1
-rw-r--r--Carpet/CarpetLib/src/mem.cc25
-rw-r--r--Carpet/CarpetLib/src/mem.hh6
-rw-r--r--Carpet/CarpetLib/src/operator_prototypes_3d.hh151
-rw-r--r--Carpet/CarpetLib/src/operator_prototypes_4d.hh9
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc42
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F9013
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc50
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc54
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc22
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_real8_eno.F9013
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F9013
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_real8_weno.F9013
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_rf2.cc45
-rw-r--r--Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc29
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc61
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc60
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc45
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc39
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_rf2.cc21
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc35
-rw-r--r--Carpet/CarpetLib/src/restrict_4d_rf2.cc23
44 files changed, 1427 insertions, 641 deletions
diff --git a/Carpet/Carpet/interface.ccl b/Carpet/Carpet/interface.ccl
index a3a76fce2..5858a8cbf 100644
--- a/Carpet/Carpet/interface.ccl
+++ b/Carpet/Carpet/interface.ccl
@@ -13,6 +13,7 @@ uses include header: loopcontrol.h
uses include header: mpi_string.hh
+uses include header: cacheinfo.hh
uses include header: defs.hh
uses include header: dist.hh
uses include header: typecase.hh
diff --git a/Carpet/Carpet/src/modes.cc b/Carpet/Carpet/src/modes.cc
index 84b58b349..9af0039eb 100644
--- a/Carpet/Carpet/src/modes.cc
+++ b/Carpet/Carpet/src/modes.cc
@@ -4,10 +4,12 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <functional>
#include <cctk.h>
#include <cctk_Parameters.h>
+#include <cacheinfo.hh>
#include <defs.hh>
#include <gdata.hh>
#include <ggf.hh>
@@ -138,7 +140,7 @@ namespace Carpet {
ivect_ref(info.ubnd)
= (ext.upper() - baseext.lower()) / ext.stride();
ivect_ref(info.lsh) = ext.shape() / ext.stride();
- ivect_ref(info.ash) = gdata::allocated_memory_shape (ext);
+ ivect_ref(info.ash) = pad_shape(ext);
if (gp.disttype == CCTK_DISTRIB_CONSTANT) {
int const dir = gp.dim==0 ? 0 : gp.dim-1;
ivect & gsh = ivect_ref(info.gsh);
@@ -611,7 +613,7 @@ namespace Carpet {
ivect_ref(cctkGH->cctk_ubnd) =
(ext.upper() - baseext.lower()) / ext.stride();
ivect_ref(cctkGH->cctk_lsh) = ext.shape() / ext.stride();
- ivect_ref(cctkGH->cctk_ash) = gdata::allocated_memory_shape (ext);
+ ivect_ref(cctkGH->cctk_ash) = pad_shape(ext);
for (int d=0; d<dim; ++d) {
cctkGH->cctk_bbox[2*d ] = obnds[0][d];
diff --git a/Carpet/CarpetIOHDF5/src/Input.cc b/Carpet/CarpetIOHDF5/src/Input.cc
index 637bb7032..4f932adee 100644
--- a/Carpet/CarpetIOHDF5/src/Input.cc
+++ b/Carpet/CarpetIOHDF5/src/Input.cc
@@ -1391,6 +1391,13 @@ static int ReadVar (const cGH* const cctkGH,
#endif
const ibbox& exterior_membox =
data.dd->light_boxes.at(mglevel).at(reflevel).at(component).exterior;
+ int const var0 = CCTK_FirstVarIndexI(gindex);
+ assert (var0>=0 and var0<CCTK_NumVars());
+ int const var = patch->vindex - var0;
+ assert (var>=0 and var<CCTK_NumVarsInGroupI(gindex));
+ const ggf* const ff = data.data.AT(var);
+ const gdata* const processor_component = ff->data_pointer
+ (timelevel, reflevel, local_component, mglevel);
// skip this dataset if it doesn't overlap with this component's
// exterior, or if it only reads what has already been read
@@ -1424,8 +1431,9 @@ static int ReadVar (const cGH* const cctkGH,
slice_start_size_t memorigin[dim], fileorigin[dim];
hsize_t memdims[dim], count[dim];
for (int i = 0; i < patch->rank; i++) {
- memdims[patch->rank-i-1] =
- (membox.upper() - membox.lower())[i] / stride[i] + 1;
+ assert (processor_component->shape()[i] ==
+ (membox.upper() - membox.lower())[i] / stride[i] + 1);
+ memdims[patch->rank-i-1] = processor_component->padded_shape()[i];
count[patch->rank-i-1] =
(overlap.upper() - overlap.lower())[i] / stride[i] + 1;
memorigin[patch->rank-i-1] =
diff --git a/Carpet/CarpetIOHDF5/src/Output.cc b/Carpet/CarpetIOHDF5/src/Output.cc
index 7288c9bc2..a6ac668cf 100644
--- a/Carpet/CarpetIOHDF5/src/Output.cc
+++ b/Carpet/CarpetIOHDF5/src/Output.cc
@@ -237,12 +237,17 @@ int WriteVarUnchunked (const cGH* const cctkGH,
// before HDF5-1.6.4 the H5Sselect_hyperslab() function expected
// the 'start' argument to be of type 'hssize_t'
slice_start_size_t overlaporigin[dim];
+ hsize_t hyperslab_start[dim], hyperslab_count[dim];
for (int d = 0; d < group.dim; ++d) {
assert (group.dim-1-d>=0 and group.dim-1-d<dim);
overlaporigin[group.dim-1-d] =
((overlap.lower() - bbox->lower()) / overlap.stride())[d];
overlapshape[group.dim-1-d] =
- (overlap.shape() / overlap.stride())[d];
+ processor_component->padded_shape()[d];
+ assert (all (processor_component->shape() ==
+ overlap.shape() / overlap.stride()));
+ hyperslab_start[group.dim-1-d] = 0;
+ hyperslab_count[group.dim-1-d] = processor_component->shape()[d];
}
// Write the processor component as a hyperslab into the recombined
@@ -250,6 +255,9 @@ int WriteVarUnchunked (const cGH* const cctkGH,
hid_t overlap_dataspace;
HDF5_ERROR (overlap_dataspace =
H5Screate_simple (group.dim, overlapshape, NULL));
+ HDF5_ERROR (H5Sselect_hyperslab (overlap_dataspace, H5S_SELECT_SET,
+ hyperslab_start, NULL,
+ hyperslab_count, NULL));
HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET,
overlaporigin, NULL,
overlapshape, NULL));
@@ -436,10 +444,15 @@ int WriteVarChunkedSequential (const cGH* const cctkGH,
hsize_t shape[dim];
hssize_t origin[dim];
+ hsize_t hyperslab_start[dim], hyperslab_count[dim];
for (int d = 0; d < group.dim; ++d) {
assert (group.dim-1-d>=0 and group.dim-1-d<dim);
origin[group.dim-1-d] = (bbox.lower() / bbox.stride())[d];
- shape[group.dim-1-d] = (bbox.shape() / bbox.stride())[d];
+ shape[group.dim-1-d] = processor_component->padded_shape()[d];
+ assert (all (processor_component->shape() ==
+ bbox.shape() / bbox.stride()));
+ hyperslab_start[group.dim-1-d] = 0;
+ hyperslab_count[group.dim-1-d] = processor_component->shape()[d];
}
// Write the component as an individual dataset
@@ -459,6 +472,9 @@ int WriteVarChunkedSequential (const cGH* const cctkGH,
HDF5_ERROR (H5Pset_filter (plist, H5Z_FILTER_FLETCHER32, 0, 0, NULL));
}
HDF5_ERROR (dataspace = H5Screate_simple (group.dim, shape, NULL));
+ HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET,
+ hyperslab_start, NULL,
+ hyperslab_count, NULL));
HDF5_ERROR (dataset = H5Dcreate (outfile, datasetname.str().c_str(),
filedatatype, dataspace, plist));
@@ -545,10 +561,15 @@ int WriteVarChunkedParallel (const cGH* const cctkGH,
// const ibbox& bbox = (*ff) (request->timelevel, refinementlevel,
// group.disttype == CCTK_DISTRIB_CONSTANT ?
// 0 : component, mglevel)->extent();
- const dh* dd = arrdata.at(gindex).at(Carpet::map).dd;
+ const dh* const dd = arrdata.AT(gindex).AT(Carpet::map).dd;
const ibbox& bbox =
dd->light_boxes.AT(mglevel).AT(refinementlevel)
.AT(group.disttype == CCTK_DISTRIB_CONSTANT ? 0 : component).exterior;
+ const ggf* const ff = arrdata.AT(gindex).AT(Carpet::map).data.AT(var);
+ const gdata* const processor_component = ff->data_pointer
+ (request->timelevel, refinementlevel,
+ group.disttype == CCTK_DISTRIB_CONSTANT ? 0 : local_component,
+ mglevel);
// Don't create zero-sized components
if (bbox.empty()) continue;
@@ -618,10 +639,15 @@ int WriteVarChunkedParallel (const cGH* const cctkGH,
// Get the shape of the HDF5 dataset (in Fortran index order)
hsize_t shape[dim];
hssize_t origin[dim];
+ hsize_t hyperslab_start[dim], hyperslab_count[dim];
for (int d = 0; d < group.dim; ++d) {
assert (group.dim-1-d>=0 and group.dim-1-d<dim);
origin[group.dim-1-d] = (bbox.lower() / bbox.stride())[d];
- shape[group.dim-1-d] = (bbox.shape() / bbox.stride())[d];
+ shape[group.dim-1-d] = processor_component->padded_shape()[d];
+ assert (all (processor_component->shape() ==
+ bbox.shape() / bbox.stride()));
+ hyperslab_start[group.dim-1-d] = 0;
+ hyperslab_count[group.dim-1-d] = processor_component->shape()[d];
}
// Write the component as an individual dataset
@@ -641,6 +667,9 @@ int WriteVarChunkedParallel (const cGH* const cctkGH,
HDF5_ERROR (H5Pset_filter (plist, H5Z_FILTER_FLETCHER32, 0, 0, NULL));
}
HDF5_ERROR (dataspace = H5Screate_simple (group.dim, shape, NULL));
+ HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET,
+ hyperslab_start, NULL,
+ hyperslab_count, NULL));
HDF5_ERROR (dataset = H5Dcreate (outfile, datasetname.str().c_str(),
filedatatype, dataspace, plist));
@@ -827,7 +856,8 @@ static int AddAttributes (const cGH *const cctkGH, const char *fullname,
dataspace, H5P_DEFAULT));
HDF5_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ioffsetdenom[0]));
HDF5_ERROR (H5Aclose (attr));
- ivect const ioffset = ((bbox.lower() % bbox.stride()) + bbox.stride()) % bbox.stride();
+ ivect const ioffset =
+ ((bbox.lower() % bbox.stride()) + bbox.stride()) % bbox.stride();
HDF5_ERROR (attr = H5Acreate (dataset, "ioffset", H5T_NATIVE_INT,
dataspace, H5P_DEFAULT));
HDF5_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ioffset[0]));
diff --git a/Carpet/CarpetInterp2/interface.ccl b/Carpet/CarpetInterp2/interface.ccl
index 24bc256bf..529876225 100644
--- a/Carpet/CarpetInterp2/interface.ccl
+++ b/Carpet/CarpetInterp2/interface.ccl
@@ -8,6 +8,7 @@ INCLUDE HEADER: fasterp.hh IN carpetinterp2.hh
USES INCLUDE HEADER: nompi.h
+USES INCLUDE HEADER: cacheinfo.hh
USES INCLUDE HEADER: defs.hh
USES INCLUDE HEADER: typeprops.hh
USES INCLUDE HEADER: vect.hh
diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc
index d5fa4fb04..fc3ac8a90 100644
--- a/Carpet/CarpetInterp2/src/fasterp.cc
+++ b/Carpet/CarpetInterp2/src/fasterp.cc
@@ -15,6 +15,7 @@
# include "nompi.h"
#endif
+#include <cacheinfo.hh>
#include <carpet.hh>
#include <vect.hh>
@@ -192,7 +193,10 @@ namespace CarpetInterp2 {
int
fasterp_src_loc_t::
calc_stencil (fasterp_iloc_t const & iloc,
+ ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
ivect const & lsh,
+#endif
int const order)
{
assert (order <= max_order);
@@ -203,8 +207,8 @@ namespace CarpetInterp2 {
pn = iloc.pn;
ipos = iloc.ipos;
- // Save lsh
- saved_lsh = lsh;
+ // Save ash
+ saved_ash = ash;
#endif
#ifndef NDEBUG
@@ -290,9 +294,9 @@ namespace CarpetInterp2 {
return -1;
}
#endif
- ind3d = iloc.ind3d + index(lsh, iorigin);
+ ind3d = iloc.ind3d + index(ash, iorigin);
#ifdef CARPETINTERP2_CHECK
- assert (index(lsh,ind) == ind3d);
+ assert (index(ash,ind) == ind3d);
#endif
return 0;
}
@@ -310,26 +314,29 @@ namespace CarpetInterp2 {
template <int O0, int O1, int O2>
void
fasterp_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
{
#ifdef CARPETINTERP2_CHECK
- assert (all (lsh == saved_lsh));
+ assert (all (ash == saved_ash));
#endif
assert (O0 == 0 or not exact[0]);
assert (O1 == 0 or not exact[1]);
assert (O2 == 0 or not exact[2]);
size_t const di = 1;
- size_t const dj = di * lsh[0];
- size_t const dk = dj * lsh[1];
+ size_t const dj = di * ash[0];
+ size_t const dk = dj * ash[1];
#ifdef CARPETINTERP2_CHECK
assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh));
- assert (ind3d == index(lsh,ind));
- assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind));
+ assert (ind3d == index(ash,ind));
+ assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(ash,ind));
#endif
for (size_t v=0; v<varptrs.size(); ++v) {
@@ -377,7 +384,10 @@ namespace CarpetInterp2 {
template <int O>
void
fasterp_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
@@ -386,29 +396,29 @@ namespace CarpetInterp2 {
if (exact[2]) {
if (exact[1]) {
if (exact[0]) {
- interpolate<Z,Z,Z> (lsh, varptrs, vals);
+ interpolate<Z,Z,Z> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,Z,Z> (lsh, varptrs, vals);
+ interpolate<O,Z,Z> (ash, lsh, varptrs, vals);
}
} else {
if (exact[0]) {
- interpolate<Z,O,Z> (lsh, varptrs, vals);
+ interpolate<Z,O,Z> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,O,Z> (lsh, varptrs, vals);
+ interpolate<O,O,Z> (ash, lsh, varptrs, vals);
}
}
} else {
if (exact[1]) {
if (exact[0]) {
- interpolate<Z,Z,O> (lsh, varptrs, vals);
+ interpolate<Z,Z,O> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,Z,O> (lsh, varptrs, vals);
+ interpolate<O,Z,O> (ash, lsh, varptrs, vals);
}
} else {
if (exact[0]) {
- interpolate<Z,O,O> (lsh, varptrs, vals);
+ interpolate<Z,O,O> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,O,O> (lsh, varptrs, vals);
+ interpolate<O,O,O> (ash, lsh, varptrs, vals);
}
}
}
@@ -422,25 +432,28 @@ namespace CarpetInterp2 {
// exact in each of the directions.
void
fasterp_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
int const order,
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
{
switch (order) {
- case 0: interpolate< 0> (lsh, varptrs, vals); break;
- case 1: interpolate< 1> (lsh, varptrs, vals); break;
- case 2: interpolate< 2> (lsh, varptrs, vals); break;
- case 3: interpolate< 3> (lsh, varptrs, vals); break;
- case 4: interpolate< 4> (lsh, varptrs, vals); break;
- case 5: interpolate< 5> (lsh, varptrs, vals); break;
- case 6: interpolate< 6> (lsh, varptrs, vals); break;
- case 7: interpolate< 7> (lsh, varptrs, vals); break;
- case 8: interpolate< 8> (lsh, varptrs, vals); break;
- case 9: interpolate< 9> (lsh, varptrs, vals); break;
- case 10: interpolate<10> (lsh, varptrs, vals); break;
- case 11: interpolate<11> (lsh, varptrs, vals); break;
+ case 0: interpolate< 0> (ash, lsh, varptrs, vals); break;
+ case 1: interpolate< 1> (ash, lsh, varptrs, vals); break;
+ case 2: interpolate< 2> (ash, lsh, varptrs, vals); break;
+ case 3: interpolate< 3> (ash, lsh, varptrs, vals); break;
+ case 4: interpolate< 4> (ash, lsh, varptrs, vals); break;
+ case 5: interpolate< 5> (ash, lsh, varptrs, vals); break;
+ case 6: interpolate< 6> (ash, lsh, varptrs, vals); break;
+ case 7: interpolate< 7> (ash, lsh, varptrs, vals); break;
+ case 8: interpolate< 8> (ash, lsh, varptrs, vals); break;
+ case 9: interpolate< 9> (ash, lsh, varptrs, vals); break;
+ case 10: interpolate<10> (ash, lsh, varptrs, vals); break;
+ case 11: interpolate<11> (ash, lsh, varptrs, vals); break;
default:
// Add higher orders here as desired
CCTK_WARN (CCTK_WARN_ABORT,
@@ -477,7 +490,7 @@ namespace CarpetInterp2 {
#endif
os << "ind3d=" << ind3d;
#ifdef CARPETINTERP2_CHECK
- os << "," << "saved_lsh=" << saved_lsh;
+ os << "," << "saved_ash=" << saved_ash;
#endif
os << "}";
}
@@ -495,8 +508,11 @@ namespace CarpetInterp2 {
int
fasterp_eno2_src_loc_t::
calc_stencil (fasterp_iloc_t const & iloc,
+ ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
ivect const & lsh,
- int const unused_order)
+#endif
+ int /*order*/)
{
//assert (order <= max_order);
CCTK_REAL const eps = 1.0e-12;
@@ -506,8 +522,8 @@ namespace CarpetInterp2 {
pn = iloc.pn;
ipos = iloc.ipos;
- // Save lsh
- saved_lsh = lsh;
+ // Save ash
+ saved_ash = ash;
#endif
// first we setup 1st order stencil!
@@ -640,9 +656,9 @@ namespace CarpetInterp2 {
return -1;
}
#endif
- ind3d = iloc.ind3d + index(lsh, iorigin);
+ ind3d = iloc.ind3d + index(ash, iorigin);
#ifdef CARPETINTERP2_CHECK
- assert (index(lsh,ind) == ind3d);
+ assert (index(ash,ind) == ind3d);
#endif
}
@@ -719,26 +735,29 @@ namespace CarpetInterp2 {
template <int O0, int O1, int O2>
void
fasterp_eno2_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
{
#ifdef CARPETINTERP2_CHECK
- assert (all (lsh == saved_lsh));
+ assert (all (ash == saved_ash));
#endif
assert (O0 == 0 or not exact[0]);
assert (O1 == 0 or not exact[1]);
assert (O2 == 0 or not exact[2]);
size_t const di = 1;
- size_t const dj = di * lsh[0];
- size_t const dk = dj * lsh[1];
+ size_t const dj = di * ash[0];
+ size_t const dk = dj * ash[1];
#ifdef CARPETINTERP2_CHECK
assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh));
- assert (ind3d == index(lsh,ind));
- assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind));
+ assert (ind3d == index(ash,ind));
+ assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(ash,ind));
#endif
for (size_t v=0; v<varptrs.size(); ++v) {
@@ -869,7 +888,10 @@ namespace CarpetInterp2 {
template <int O>
void
fasterp_eno2_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
@@ -878,29 +900,29 @@ namespace CarpetInterp2 {
if (exact[2]) {
if (exact[1]) {
if (exact[0]) {
- interpolate<Z,Z,Z> (lsh, varptrs, vals);
+ interpolate<Z,Z,Z> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,Z,Z> (lsh, varptrs, vals);
+ interpolate<O,Z,Z> (ash, lsh, varptrs, vals);
}
} else {
if (exact[0]) {
- interpolate<Z,O,Z> (lsh, varptrs, vals);
+ interpolate<Z,O,Z> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,O,Z> (lsh, varptrs, vals);
+ interpolate<O,O,Z> (ash, lsh, varptrs, vals);
}
}
} else {
if (exact[1]) {
if (exact[0]) {
- interpolate<Z,Z,O> (lsh, varptrs, vals);
+ interpolate<Z,Z,O> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,Z,O> (lsh, varptrs, vals);
+ interpolate<O,Z,O> (ash, lsh, varptrs, vals);
}
} else {
if (exact[0]) {
- interpolate<Z,O,O> (lsh, varptrs, vals);
+ interpolate<Z,O,O> (ash, lsh, varptrs, vals);
} else {
- interpolate<O,O,O> (lsh, varptrs, vals);
+ interpolate<O,O,O> (ash, lsh, varptrs, vals);
}
}
}
@@ -914,14 +936,17 @@ namespace CarpetInterp2 {
// exact in each of the directions.
void
fasterp_eno2_src_loc_t::
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
int const order,
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict const vals)
const
{
switch (order) {
- case 2: interpolate< 2> (lsh, varptrs, vals); break;
+ case 2: interpolate< 2> (ash, lsh, varptrs, vals); break;
default:
// Add higher orders here as desired
CCTK_WARN (CCTK_WARN_ABORT,
@@ -958,7 +983,7 @@ namespace CarpetInterp2 {
#endif
os << "ind3d=" << ind3d;
#ifdef CARPETINTERP2_CHECK
- os << "," << "saved_lsh=" << saved_lsh;
+ os << "," << "saved_ash=" << saved_ash;
#endif
os << "}";
}
@@ -1173,8 +1198,8 @@ namespace CarpetInterp2 {
assert (all (abs(dpos) <= rvect(0.5)));
ivect const ind = ipos - ext.lower() / ext.stride();
- ivect const lsh = ext.shape() / ext.stride();
- int const ind3d = index(lsh, ind);
+ ivect const ash = pad_shape(ext);
+ int const ind3d = index(ash, ind);
#if 0
ENTER_SINGLEMAP_MODE (cctkGH, m, CCTK_GF) {
ENTER_LOCAL_MODE (cctkGH, c, CCTK_GF) {
@@ -1426,6 +1451,7 @@ namespace CarpetInterp2 {
assert (Carpet::vhh.AT(m)->is_local(rl,c));
ibbox const & ext =
Carpet::vdd.AT(m)->light_boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior;
+ send_comp.ash = pad_shape(ext);
send_comp.lsh = ext.shape() / ext.stride();
send_comp.offset = offset;
@@ -1448,7 +1474,8 @@ namespace CarpetInterp2 {
//fasterp_src_loc_t sloc;
FASTERP sloc;
- int const ierr = sloc.calc_stencil (iloc, send_comp.lsh, order);
+ int const ierr =
+ sloc.calc_stencil (iloc, send_comp.ash, send_comp.lsh, order);
if (ierr) {
CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
"Could not determine valid interpolation stencil for point %d on map %d, refinement level %d, component %d",
@@ -1482,7 +1509,7 @@ namespace CarpetInterp2 {
for (int n=0; n<send_comp.npoints; ++n) {
FASTERP const & sloc = send_comp.locs.at(n);
assert (sloc.mrc == send_comp.mrc);
- assert (all (sloc.saved_lsh == send_comp.lsh));
+ assert (all (sloc.saved_ash == send_comp.ash));
}
}
#endif
@@ -1680,7 +1707,8 @@ namespace CarpetInterp2 {
for (int n=0; n<int(send_comp.locs.size()); ++n) {
size_t const ind = (send_comp.offset + n) * nvars;
send_comp.locs.AT(n).interpolate
- (send_comp.lsh, order, varptrs, &computed_points.AT(ind));
+ (send_comp.ash, send_comp.lsh,
+ order, varptrs, &computed_points.AT(ind));
#ifdef CARPETINTERP2_CHECK
computed_pn.AT(send_comp.offset + n) = send_comp.locs.AT(n).pn;
#endif
diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh
index 081b1c82d..37deea70e 100644
--- a/Carpet/CarpetInterp2/src/fasterp.hh
+++ b/Carpet/CarpetInterp2/src/fasterp.hh
@@ -191,17 +191,23 @@ namespace CarpetInterp2 {
#ifdef CARPETINTERP2_CHECK
public:
- ivect saved_lsh; // copy of lsh
+ ivect saved_ash; // copy of ash
private:
#endif
public:
int
calc_stencil (fasterp_iloc_t const & iloc,
+ ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
ivect const & lsh,
+#endif
int order);
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
int order,
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
@@ -210,13 +216,19 @@ namespace CarpetInterp2 {
private:
template <int O>
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
const;
template <int O0, int O1, int O2>
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
const;
@@ -251,17 +263,23 @@ namespace CarpetInterp2 {
#ifdef CARPETINTERP2_CHECK
public:
- ivect saved_lsh; // copy of lsh
+ ivect saved_ash; // copy of ash
private:
#endif
public:
int
calc_stencil (fasterp_iloc_t const & iloc,
+ ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
ivect const & lsh,
- int unused_order);
+#endif
+ int /*order*/);
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
int order,
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
@@ -270,13 +288,19 @@ namespace CarpetInterp2 {
private:
template <int O>
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
const;
template <int O0, int O1, int O2>
void
- interpolate (ivect const & lsh,
+ interpolate (ivect const & ash,
+#ifdef CARPETINTERP2_CHECK
+ ivect const & lsh,
+#endif
vector<CCTK_REAL const *> const & varptrs,
CCTK_REAL * restrict vals)
const;
@@ -317,7 +341,10 @@ namespace CarpetInterp2 {
vector<FASTERP> locs;
mrc_t mrc; // source map, refinement level, component
+ ivect ash;
+#ifdef CARPETINTERP2_CHECK
ivect lsh;
+#endif
int offset;
int npoints;
};
diff --git a/Carpet/CarpetLib/configuration.ccl b/Carpet/CarpetLib/configuration.ccl
index 17ffa1f79..a77f237fc 100644
--- a/Carpet/CarpetLib/configuration.ccl
+++ b/Carpet/CarpetLib/configuration.ccl
@@ -8,7 +8,7 @@ PROVIDES CarpetLib
REQUIRES MPI Vectors
-OPTIONAL LoopControl
+OPTIONAL LoopControl MPI
{
}
diff --git a/Carpet/CarpetLib/interface.ccl b/Carpet/CarpetLib/interface.ccl
index 12cba170c..6495fdec3 100644
--- a/Carpet/CarpetLib/interface.ccl
+++ b/Carpet/CarpetLib/interface.ccl
@@ -4,6 +4,7 @@ IMPLEMENTS: CarpetLib
includes header: mpi_string.hh in mpi_string.hh
+includes header: cacheinfo.hh in cacheinfo.hh
includes header: defs.hh in defs.hh
includes header: dist.hh in dist.hh
includes header: typecase.hh in typecase.hh
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index c559f1604..c13d54ba1 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -159,13 +159,13 @@ BOOLEAN combine_recompose "Recompose all grid functions of one refinement levels
-# Memory allocation parameters
-
-INT avoid_arraysize_bytes "Avoid array sizes that are multiples of this" STEERABLE=recover
-{
- 0 :: "don't avoid anything"
- 2:* :: ""
-} 0
+# # Memory allocation parameters
+#
+# INT avoid_arraysize_bytes "Avoid array sizes that are multiples of this" STEERABLE=recover
+# {
+# 0 :: "don't avoid anything"
+# 2:* :: ""
+# } 0
@@ -211,6 +211,103 @@ BOOLEAN use_mpi_ssend "Use MPI_Ssend instead of MPI_Isend" STEERABLE=always
+# Memory and cache information -- this is machine dependent and should
+# be determined at run time or set via simfactory
+
+CCTK_INT vector_size "vector size" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT D1size "level 1 data cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT D1linesize "level 1 data cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT D1assoc "level 1 data cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT L2size "level 2 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT L2linesize "level 2 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT L2assoc "level 2 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT L3size "level 3 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT L3linesize "level 3 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT L3assoc "level 3 unified cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT TLB_D1entries "level 1 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT TLB_D1pagesize "level 1 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT TLB_D1assoc "level 1 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT TLB_L2entries "level 2 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT TLB_L2pagesize "level 2 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+CCTK_INT TLB_L2assoc "level 2 TLB cache" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+CCTK_INT pagesize "size of a memory page" STEERABLE=recover
+{
+ 0 :: "unknown"
+ 1:* :: ""
+} 0
+
+
+
SHARES: IO
USES STRING out_dir
diff --git a/Carpet/CarpetLib/src/cacheinfo.cc b/Carpet/CarpetLib/src/cacheinfo.cc
new file mode 100644
index 000000000..897cb533e
--- /dev/null
+++ b/Carpet/CarpetLib/src/cacheinfo.cc
@@ -0,0 +1,162 @@
+#include "cacheinfo.hh"
+
+#include <cctk_Parameters.h>
+
+#include <vectors.h>
+
+
+
+template<int D>
+vect<int,D>
+pad_shape (bbox<int,D> const& extent)
+{
+ assert (all (extent.shape() >= 0));
+ return pad_shape(extent.shape() / extent.stride());
+}
+
+
+
+template<int D>
+vect<int,D>
+pad_shape (vect<int,D> const& shape)
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ assert (all(shape>=0));
+
+ static bool have_cacheinfo = false;
+ static vector<cacheinfo_t> cacheinfo;
+ if (not have_cacheinfo) {
+ // Ignore L1 caches that are probably too small to be useful (e.g.
+ // on Intel or AMD processors)
+ // TODO: make this a parameter
+ if (D1size >= 128*1024) {
+ cacheinfo.push_back(cacheinfo_t(D1size, D1linesize, D1assoc));
+ }
+#if 0
+ // TODO: this is too simplistic:
+ // Add page size as a cache
+ if (pagesize>0) {
+ cacheinfo.push_back(cacheinfo_t(pagesize));
+ }
+#endif
+ if (L2size>0) {
+ cacheinfo.push_back(cacheinfo_t(L2size, L2linesize, L2assoc));
+ }
+ if (L3size>0) {
+ cacheinfo.push_back(cacheinfo_t(L3size, L3linesize, L3assoc));
+ }
+ if (TLB_D1entries>0) {
+ ptrdiff_t const TLB_D1size = TLB_D1entries * TLB_D1pagesize * TLB_D1assoc;
+ cacheinfo.push_back(cacheinfo_t(TLB_D1size, TLB_D1pagesize, TLB_D1assoc));
+ }
+ if (TLB_L2entries>0) {
+ ptrdiff_t const TLB_L2size = TLB_L2entries * TLB_L2pagesize * TLB_L2assoc;
+ cacheinfo.push_back(cacheinfo_t(TLB_L2size, TLB_L2pagesize, TLB_L2assoc));
+ }
+
+ // TODO: sort caches by their sizes
+ for (size_t n=0; n<cacheinfo.size(); ++n) {
+ cacheinfo_t const& ci = cacheinfo.at(n);
+ if (n>0) {
+ // Ensure that the cache size is larger than the next lower
+ // cache size
+ assert (ci.size() > cacheinfo.at(n-1).size());
+ // Ensure that the cache line size is evenly divided by the
+ // next lower cache line size
+ assert (ci.linesize() % cacheinfo.at(n-1).linesize() == 0);
+ assert (ci.stride() > cacheinfo.at(n-1).stride());
+ }
+ } // for cacheinfo
+
+ have_cacheinfo = true;
+ } // if not have_cacheinfo
+
+ vect<int,D> padded_shape;
+ int accumulated_npoints = 1;
+ for (int d=0; d<D; ++d) {
+ int npoints = shape[d];
+
+ if (d == 0) {
+#if VECTORISE && VECTORISE_ALIGNED_ARRAYS
+ // Pad array to a multiple of the vector size. Note that this is
+ // a hard requirement, so that we can emit aligned load/store
+ // operations.
+ npoints = align_up (npoints, CCTK_REAL_VEC_SIZE);
+#endif
+ if (vector_size > 0) {
+ npoints = align_up (npoints, vector_size);
+ }
+ }
+
+ for (size_t n=0; n<cacheinfo.size(); ++n) {
+ cacheinfo_t const& ci = cacheinfo.at(n);
+
+ // Pad array in this direction to a multiple of this cache line
+ // size
+ assert (ci.linesize() % sizeof(CCTK_REAL) == 0);
+ int const linesize = ci.linesize() / sizeof(CCTK_REAL);
+ assert (is_power_of_2(linesize));
+ if (npoints * accumulated_npoints >= linesize) {
+ // The extent is at least one cache line long: round up to the
+ // next full cache line
+ npoints = align_up (npoints, linesize);
+ } else {
+#if 0
+ // The extent is less than one cache line long: Ensure that
+ // the array size divides the cache line size evenly by
+ // rounding to the next power of 2
+ // NOTE: This is disabled, since this would align everything
+ // to powers of 2.
+ npoints = next_power_of_2(npoints);
+#endif
+ }
+
+ // Avoid multiples of the cache stride
+ assert (ci.stride() % sizeof(CCTK_REAL) == 0);
+ int const stride = ci.stride() / sizeof(CCTK_REAL);
+ if (npoints * accumulated_npoints % stride == 0) {
+ assert (linesize < stride);
+ npoints += linesize;
+ }
+
+ } // for cacheinfo
+
+ padded_shape[d] = npoints;
+ accumulated_npoints *= npoints;
+ }
+ assert (prod (padded_shape) == accumulated_npoints);
+
+ // self-check
+ for (int d=0; d<D; ++d) {
+ assert (padded_shape[d] >= shape[d]);
+#if VECTORISE && VECTORISE_ALIGNED_ARRAYS
+ if (d == 0) {
+ assert (padded_shape[d] % CCTK_REAL_VEC_SIZE == 0);
+ }
+#endif
+ if (vector_size > 0) {
+ if (d == 0) {
+ assert (padded_shape[d] % vector_size == 0);
+ }
+ }
+
+ // TODO: add self-checks for the other requirements as well
+ }
+
+ if (verbose) {
+ ostringstream buf;
+ buf << "padding " << shape << " to " << padded_shape;
+ CCTK_INFO (buf.str().c_str());
+ }
+
+ return padded_shape;
+}
+
+
+
+template vect<int,3> pad_shape (bbox<int,3> const& extent);
+template vect<int,3> pad_shape (vect<int,3> const& shape);
+
+template vect<int,4> pad_shape (bbox<int,4> const& extent);
+template vect<int,4> pad_shape (vect<int,4> const& shape);
diff --git a/Carpet/CarpetLib/src/cacheinfo.hh b/Carpet/CarpetLib/src/cacheinfo.hh
new file mode 100644
index 000000000..14b464a86
--- /dev/null
+++ b/Carpet/CarpetLib/src/cacheinfo.hh
@@ -0,0 +1,158 @@
+#ifndef CACHEINFO_HH
+#define CACHEINFO_HH
+
+#include <cctk.h>
+
+#include <limits>
+
+#include "bbox.hh"
+#include "defs.hh"
+#include "vect.hh"
+
+
+
+static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align)
+{
+ assert (x >= 0);
+ assert (align > 0);
+ return x / align;
+}
+
+static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align)
+{
+ assert (x >= 0);
+ assert (align > 0);
+ return (x + align - 1) / align;
+}
+
+static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align)
+{
+ assert (x >= 0);
+ assert (align > 0);
+ return div_down(x, align) * align;
+}
+
+static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align)
+{
+ assert (x >= 0);
+ assert (align > 0);
+ return div_up(x, align) * align;
+}
+
+static ptrdiff_t next_power_of_2 (ptrdiff_t const x)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t next_power_of_2 (ptrdiff_t const x)
+{
+ assert (x > 0);
+ ptrdiff_t res = 1;
+ while (res < x) {
+ res *= 2;
+ assert (res > 0); // try to catch overflows
+ }
+ return res;
+}
+
+static ptrdiff_t previous_power_of_2 (ptrdiff_t const x)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t previous_power_of_2 (ptrdiff_t const x)
+{
+ return next_power_of_2(x/2+1);
+}
+
+static bool is_power_of_2 (ptrdiff_t const x)
+ CCTK_ATTRIBUTE_UNUSED;
+static bool is_power_of_2 (ptrdiff_t const x)
+{
+ return x == next_power_of_2(x);
+}
+
+#if 0
+static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y)
+ CCTK_ATTRIBUTE_UNUSED;
+static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y)
+{
+ assert (x > 0 && y > 0);
+ ptrdiff_t z = x;
+ // TODO: improve LCM algorithm
+ while (z % y != 0) z += x;
+ assert (z % x == 0 && z % y == 0);
+ return z;
+}
+#endif
+
+
+
+class cacheinfo_t {
+ ptrdiff_t m_size; // bytes
+ ptrdiff_t m_linesize; // bytes (pagesize for TLBs)
+ ptrdiff_t m_associativity;
+public:
+ bool invariant () const
+ {
+ return
+ is_power_of_2(m_size) and
+ is_power_of_2(m_linesize) and
+ is_power_of_2(m_associativity) and
+ m_size % (m_linesize * m_associativity) == 0;
+ }
+ cacheinfo_t (ptrdiff_t const a_size,
+ ptrdiff_t const a_linesize,
+ ptrdiff_t const a_associativity)
+ : m_size (a_size),
+ m_linesize (a_linesize),
+ m_associativity (a_associativity)
+ {
+ assert (invariant());
+ }
+ cacheinfo_t (ptrdiff_t const a_linesize)
+ : m_size (previous_power_of_2(numeric_limits<ptrdiff_t>::max())),
+ m_linesize (a_linesize),
+ m_associativity (1)
+ {
+ assert (invariant());
+ }
+ // size in bytes
+ ptrdiff_t size() const
+ {
+ return m_size;
+ }
+ // line size in bytes
+ ptrdiff_t linesize() const
+ {
+ return m_linesize;
+ }
+ // associativity
+ ptrdiff_t associativity() const
+ {
+ return m_associativity;
+ }
+ // number of cache elements
+ ptrdiff_t num_elements() const
+ {
+ return size() / (linesize() * associativity());
+ }
+ // stride (between main memory locations that use the same cache
+ // element) in bytes
+ ptrdiff_t stride() const
+ {
+ return num_elements() * linesize();
+ }
+};
+
+
+
+// These routines are apparently not pure -- don't know why
+template<int D>
+vect<int,D> pad_shape (bbox<int,D> const& extent) /*CCTK_ATTRIBUTE_PURE*/;
+template<int D>
+vect<int,D> pad_shape (vect<int,D> const& shape) /*CCTK_ATTRIBUTE_PURE*/;
+
+#endif // CACHEINFO_HH
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc
index 11eedc0c2..7cb4ddb80 100644
--- a/Carpet/CarpetLib/src/copy_3d.cc
+++ b/Carpet/CarpetLib/src/copy_3d.cc
@@ -22,9 +22,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -32,8 +34,10 @@ namespace CarpetLib {
template <typename T>
void
copy_3d (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -48,7 +52,6 @@ namespace CarpetLib {
dstbbox.stride() != dstregbbox.stride() or
srcregbbox1.stride() != dstregbbox.stride()))
{
-#pragma omp critical
{
cout << "copy_3d.cc:" << endl
<< "srcbbox=" << srcbbox << endl
@@ -66,7 +69,6 @@ namespace CarpetLib {
// This could be handled, but is likely to point to an error
// elsewhere
if (dstregbbox.empty()) {
-#pragma omp critical
CCTK_WARN (0, "Internal error: region extent is empty");
}
@@ -79,7 +81,6 @@ namespace CarpetLib {
if (not srcregbbox.is_contained_in(srcbbox) or
not dstregbbox.is_contained_in(dstbbox))
{
-#pragma omp critical
{
cout << "copy_3d.cc:" << endl
<< "srcbbox=" << srcbbox << endl
@@ -93,13 +94,6 @@ namespace CarpetLib {
}
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
-#pragma omp critical
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = dstregbbox.shape() / dstregbbox.stride();
@@ -110,6 +104,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -152,8 +154,10 @@ namespace CarpetLib {
template \
void \
copy_3d (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc
index 7aed29aaf..b0cc024f4 100644
--- a/Carpet/CarpetLib/src/copy_4d.cc
+++ b/Carpet/CarpetLib/src/copy_4d.cc
@@ -7,7 +7,6 @@
#include <cstdlib>
#include <iostream>
-#include "gdata.hh"
#include "operator_prototypes_4d.hh"
#include "typeprops.hh"
@@ -21,9 +20,11 @@ namespace CarpetLib {
#define SRCIND4(i,j,k,l) \
index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \
+ srcipadext, srcjpadext, srckpadext, srclpadext, \
srciext, srcjext, srckext, srclext)
-#define DSTIND4(i,j,k,l) \
- index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \
+#define DSTIND4(i,j,k,l) \
+ index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \
+ dstipadext, dstjpadext, dstkpadext, dstlpadext, \
dstiext, dstjext, dstkext, dstlext)
@@ -31,8 +32,10 @@ namespace CarpetLib {
template <typename T>
void
copy_4d (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -68,12 +71,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect4 const regext = regbbox.shape() / regbbox.stride();
@@ -84,6 +81,16 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+ ptrdiff_t const srclpadext = srcpadext[3];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+ ptrdiff_t const dstlpadext = dstpadext[3];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -133,8 +140,10 @@ namespace CarpetLib {
template \
void \
copy_4d (T const * restrict const src, \
+ ivect4 const & restrict srcpadext, \
ivect4 const & restrict srcext, \
T * restrict const dst, \
+ ivect4 const & restrict dstpadext, \
ivect4 const & restrict dstext, \
ibbox4 const & restrict srcbbox, \
ibbox4 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 47cb392bf..775f5ec50 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -18,6 +18,7 @@
#include "bbox.hh"
#include "bboxset.hh"
+#include "cacheinfo.hh"
#include "defs.hh"
#include "dist.hh"
#include "timestat.hh"
@@ -37,8 +38,10 @@ static
void
call_operator (void
(* the_operator) (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -46,8 +49,10 @@ call_operator (void
ibbox3 const & restrict dstregbbox,
void * const extraargs),
T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -90,7 +95,7 @@ call_operator (void
dstregbbox.stride());
if (not mydstregbbox.empty()) {
(* the_operator)
- (src, srcext, dst, dstext, srcbbox, dstbbox,
+ (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox,
srcregbbox, mydstregbbox, extraargs);
# ifdef CARPET_DEBUG
#pragma omp critical
@@ -113,8 +118,10 @@ static
void
call_operator (void
(* the_operator) (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -122,8 +129,10 @@ call_operator (void
ibbox4 const & restrict dstregbbox,
void * const extraargs),
T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -133,7 +142,7 @@ call_operator (void
{
#ifndef _OPENMP
(* the_operator)
- (src, srcext, dst, dstext, srcbbox, dstbbox,
+ (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox,
srcregbbox, dstregbbox, extraargs);
#else
# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE)
@@ -166,7 +175,7 @@ call_operator (void
dstregbbox.stride());
if (not mydstregbbox.empty()) {
(* the_operator)
- (src, srcext, dst, dstext, srcbbox, dstbbox,
+ (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox,
srcregbbox, mydstregbbox, extraargs);
# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE)
#pragma omp critical
@@ -191,8 +200,10 @@ call_operator (void
template <typename T>
void
prolongate_3d_eno (T const * restrict const /*src*/,
+ ivect3 const & /*srcpadext*/,
ivect3 const & /*srcext*/,
T * restrict const /*dst*/,
+ ivect3 const & /*dstpadext*/,
ivect3 const & /*dstext*/,
ibbox3 const & /*srcbbox*/,
ibbox3 const & /*dstbbox*/,
@@ -207,8 +218,10 @@ extern "C"
void
CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno)
(const CCTK_REAL8* src,
+ const int& srcipadext, const int& srcjpadext, const int& srckpadext,
const int& srciext, const int& srcjext, const int& srckext,
CCTK_REAL8* dst,
+ const int& dstipadext, const int& dstjpadext, const int& dstkpadext,
const int& dstiext, const int& dstjext, const int& dstkext,
const int srcbbox[3][3],
const int dstbbox[3][3],
@@ -217,8 +230,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno)
template <>
void
prolongate_3d_eno (CCTK_REAL8 const * restrict const src,
+ ivect3 const & srcpadext,
ivect3 const & srcext,
CCTK_REAL8 * restrict const dst,
+ ivect3 const & dstpadext,
ivect3 const & dstext,
ibbox3 const & srcbbox,
ibbox3 const & dstbbox,
@@ -229,8 +244,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src,
assert (not extraargs);
CCTK_FNAME(prolongate_3d_real8_eno)
(src,
+ srcpadext[0], srcpadext[1], srcpadext[2],
srcext[0], srcext[1], srcext[2],
dst,
+ dstpadext[0], dstpadext[1], dstpadext[2],
dstext[0], dstext[1], dstext[2],
reinterpret_cast <int const (*) [3]> (& srcbbox),
reinterpret_cast <int const (*) [3]> (& dstbbox),
@@ -242,8 +259,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src,
template <typename T>
void
prolongate_3d_weno (T const * restrict const /*src*/,
+ ivect3 const & /*srcpadext*/,
ivect3 const & /*srcext*/,
T * restrict const /*dst*/,
+ ivect3 const & /*dstpadext*/,
ivect3 const & /*dstext*/,
ibbox3 const & /*srcbbox*/,
ibbox3 const & /*dstbbox*/,
@@ -258,8 +277,10 @@ extern "C"
void
CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno)
(const CCTK_REAL8* src,
+ const int& srcipadext, const int& srcjpadext, const int& srckpadext,
const int& srciext, const int& srcjext, const int& srckext,
CCTK_REAL8* dst,
+ const int& dstipadext, const int& dstjpadext, const int& dstkpadext,
const int& dstiext, const int& dstjext, const int& dstkext,
const int srcbbox[3][3],
const int dstbbox[3][3],
@@ -268,8 +289,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno)
template <>
void
prolongate_3d_weno (CCTK_REAL8 const * restrict const src,
+ ivect3 const & srcpadext,
ivect3 const & srcext,
CCTK_REAL8 * restrict const dst,
+ ivect3 const & dstpadext,
ivect3 const & dstext,
ibbox3 const & srcbbox,
ibbox3 const & dstbbox,
@@ -279,8 +302,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src,
{
CCTK_FNAME(prolongate_3d_real8_weno)
(src,
+ srcpadext[0], srcpadext[1], srcpadext[2],
srcext[0], srcext[1], srcext[2],
dst,
+ dstpadext[0], dstpadext[1], dstpadext[2],
dstext[0], dstext[1], dstext[2],
reinterpret_cast <int const (*) [3]> (& srcbbox),
reinterpret_cast <int const (*) [3]> (& dstbbox),
@@ -292,8 +317,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src,
template <typename T>
void
prolongate_3d_tvd (T const * restrict const /*src*/,
+ ivect3 const & /*srcpadext*/,
ivect3 const & /*srcext*/,
T * restrict const /*dst*/,
+ ivect3 const & /*dstpadext*/,
ivect3 const & /*dstext*/,
ibbox3 const & /*srcbbox*/,
ibbox3 const & /*dstbbox*/,
@@ -309,8 +336,10 @@ extern "C"
void
CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd)
(const CCTK_REAL8* src,
+ const int& srcipadext, const int& srcjpadext, const int& srckpadext,
const int& srciext, const int& srcjext, const int& srckext,
CCTK_REAL8* dst,
+ const int& dstipadext, const int& dstjpadext, const int& dstkpadext,
const int& dstiext, const int& dstjext, const int& dstkext,
const int srcbbox[3][3],
const int dstbbox[3][3],
@@ -319,8 +348,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd)
template <>
void
prolongate_3d_tvd (CCTK_REAL8 const * restrict const src,
+ ivect3 const & srcpadext,
ivect3 const & srcext,
CCTK_REAL8 * restrict const dst,
+ ivect3 const & dstpadext,
ivect3 const & dstext,
ibbox3 const & srcbbox,
ibbox3 const & dstbbox,
@@ -331,8 +362,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src,
assert (not extraargs);
CCTK_FNAME(prolongate_3d_real8_tvd)
(src,
+ srcpadext[0], srcpadext[1], srcpadext[2],
srcext[0], srcext[1], srcext[2],
dst,
+ dstpadext[0], dstpadext[1], dstpadext[2],
dstext[0], dstext[1], dstext[2],
reinterpret_cast <int const (*) [3]> (& srcbbox),
reinterpret_cast <int const (*) [3]> (& dstbbox),
@@ -345,8 +378,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src,
template <typename T>
void
prolongate_3d_cc_tvd (T const * restrict const /*src*/,
+ ivect3 const & /*srcpadext*/,
ivect3 const & /*srcext*/,
T * restrict const /*dst*/,
+ ivect3 const & /*dstpadext*/,
ivect3 const & /*dstext*/,
ibbox3 const & /*srcbbox*/,
ibbox3 const & /*dstbbox*/,
@@ -362,8 +397,10 @@ extern "C"
void
CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd)
(const CCTK_REAL8* src,
+ const int& srcipadext, const int& srcjpadext, const int& srckpadext,
const int& srciext, const int& srcjext, const int& srckext,
CCTK_REAL8* dst,
+ const int& dstipadext, const int& dstjpadext, const int& dstkpadext,
const int& dstiext, const int& dstjext, const int& dstkext,
const int srcbbox[3][3],
const int dstbbox[3][3],
@@ -372,8 +409,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd)
template <>
void
prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src,
+ ivect3 const & srcpadext,
ivect3 const & srcext,
CCTK_REAL8 * restrict const dst,
+ ivect3 const & dstpadext,
ivect3 const & dstext,
ibbox3 const & srcbbox,
ibbox3 const & dstbbox,
@@ -384,8 +423,10 @@ prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src,
assert (not extraargs);
CCTK_FNAME(prolongate_3d_cc_real8_tvd)
(src,
+ srcpadext[0], srcpadext[1], srcpadext[2],
srcext[0], srcext[1], srcext[2],
dst,
+ dstpadext[0], dstpadext[1], dstpadext[2],
dstext[0], dstext[1], dstext[2],
reinterpret_cast <int const (*) [3]> (& srcbbox),
reinterpret_cast <int const (*) [3]> (& dstbbox),
@@ -466,16 +507,13 @@ void data<T>::allocate (const ibbox& extent_,
// data
_extent = extent_;
- //_shape = max (ivect(0), _extent.shape() / _extent.stride());
- _shape = allocated_memory_shape (_extent);
- assert (all (_shape >= max (ivect(0), _extent.shape() / _extent.stride())));
-
- _size = 1;
- for (int d=0; d<dim; ++d) {
- _stride[d] = _size;
- assert (_shape[d]==0 or _size <= numeric_limits<int>::max() / _shape[d]);
- _size *= _shape[d];
+ _shape = max (ivect(0), _extent.shape() / _extent.stride());
+ _padded_shape = pad_shape(_shape);
+ _stride[0] = 1;
+ for (int d=1; d<dim; ++d) {
+ _stride[d] = _stride[d-1] * _padded_shape[d-1];
}
+ _size = _stride[dim-1] * _padded_shape[dim-1];
_proc = proc_;
if (dist::rank() == _proc) {
@@ -512,9 +550,7 @@ size_t data<T>::allocsize (const ibbox & extent_, const int proc_) const
if (dist::rank() != proc_) return 0;
if (vectorindex != 0) return 0;
assert (not vectorleader);
- ivect const shape_ = allocated_memory_shape (extent_);
- //return vectorlength * extent_.size() * sizeof (T);
- return vectorlength * prod(shape_) * sizeof (T);
+ return vectorlength * prod (pad_shape (extent_)) * sizeof (T);
}
@@ -541,17 +577,17 @@ copy_from_innerloop (gdata const * const gsrc,
#if CARPET_DIM == 3
// Don't use call_operator, because we parallelise ourselves
copy_3d(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox, dstbox,
srcregbox, dstregbox, (void*)slabinfo);
#elif CARPET_DIM == 4
// Don't use call_operator, because we parallelise ourselves
copy_4d(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox, dstbox,
srcregbox, dstregbox, (void*)slabinfo);
#else
@@ -725,8 +761,10 @@ transfer_prolongate (data const * const src,
case vertex_centered: {
static
void (* the_operators[]) (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -762,9 +800,9 @@ transfer_prolongate (data const * const src,
}
call_operator<T> (the_operators[order_space],
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -774,9 +812,9 @@ transfer_prolongate (data const * const src,
if (use_dgfe) {
// Don't use call_operator, because we parallelise ourselves
prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast<T *>(this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -784,8 +822,10 @@ transfer_prolongate (data const * const src,
}
static
void (* the_operators[]) (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -806,9 +846,9 @@ transfer_prolongate (data const * const src,
}
call_operator<T> (the_operators[order_space],
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -835,9 +875,9 @@ transfer_prolongate (data const * const src,
case 3:
call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -848,9 +888,9 @@ transfer_prolongate (data const * const src,
// hydro, so we cheat here.
call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -860,13 +900,15 @@ transfer_prolongate (data const * const src,
"There is no stencil for op=\"ENO\" with order_space!=3");
break;
}
+ break;
}
- break;
case cell_centered: {
static
void (* the_operators[]) (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -937,18 +979,20 @@ transfer_prolongate (data const * const src,
call_operator<T> (the_operators[order_space-2],
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
+ break;
}
- break;
+ default:
+ assert(0);
}
timer.stop (0);
+ break;
}
- break;
case op_WENO: {
static Timer timer ("prolongate_WENO");
timer.start ();
@@ -967,9 +1011,9 @@ transfer_prolongate (data const * const src,
case 5:
call_operator<T> (& prolongate_3d_eno,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -979,15 +1023,15 @@ transfer_prolongate (data const * const src,
"There is no stencil for op=\"WENO\" with order_space!=5");
break;
}
-
+ break;
}
- break;
case cell_centered: {
CCTK_WARN (CCTK_WARN_ABORT,
"There are currently no cell-centred stencils for op=\"WENO\"");
break;
}
- break;
+ default:
+ assert(0);
}
timer.stop (0);
}
@@ -1002,9 +1046,9 @@ transfer_prolongate (data const * const src,
case 1:
call_operator<T> (& prolongate_3d_tvd,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1021,9 +1065,9 @@ transfer_prolongate (data const * const src,
case 1:
call_operator<T> (& prolongate_3d_cc_tvd,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1035,6 +1079,8 @@ transfer_prolongate (data const * const src,
}
break;
}
+ default:
+ assert(0);
}
timer.stop (0);
break;
@@ -1055,9 +1101,9 @@ transfer_prolongate (data const * const src,
case 5:
call_operator<T> (& prolongate_3d_o5_monotone_rf2,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1090,9 +1136,9 @@ transfer_prolongate (data const * const src,
case 1:
call_operator<T> (& prolongate_4d_o1_rf2,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1164,9 +1210,9 @@ transfer_restrict (data const * const src,
assert (not slabinfo);
call_operator<T> (& restrict_3d_rf2,
static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcregbox, dstregbox, NULL);
@@ -1182,9 +1228,9 @@ transfer_restrict (data const * const src,
if (use_dgfe) {
// Don't use call_operator, because we parallelise ourselves
restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast<T *>(this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
@@ -1194,37 +1240,37 @@ transfer_restrict (data const * const src,
transport_operator != op_WENO and
transport_operator != op_ENO) { // HACK
switch (restriction_order_space) {
- case 1:
+ case 1:
// Don't use call_operator, because we parallelise ourselves
restrict_3d_cc_rf2(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
break;
- case 3:
+ case 3:
// Don't use call_operator, because we parallelise ourselves
restrict_3d_cc_o3_rf2(static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
+ src->padded_shape(), src->shape(),
+ static_cast <T *> (this->storage()),
+ this->padded_shape(), this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
break;
- case 5:
+ case 5:
// Don't use call_operator, because we parallelise ourselves
restrict_3d_cc_o5_rf2(static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
+ src->padded_shape(), src->shape(),
+ static_cast <T *> (this->storage()),
+ this->padded_shape(), this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
break;
- default:
+ default:
CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
"There is no restriction stencil with restriction_order_space==%d", restriction_order_space);
break;
@@ -1233,36 +1279,36 @@ transfer_restrict (data const * const src,
}
// Don't use call_operator, because we parallelise ourselves
restrict_3d_cc_rf2(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(0,1,1))) {
// Don't use call_operator, because we parallelise ourselves
restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(1,0,1))) {
// Don't use call_operator, because we parallelise ourselves
restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(1,1,0))) {
// Don't use call_operator, because we parallelise ourselves
restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
@@ -1291,9 +1337,9 @@ transfer_restrict (data const * const src,
case vertex_centered:
// Don't use call_operator, because we parallelise ourselves
restrict_4d_rf2(static_cast <T const *> (src->storage()),
- src->shape(),
+ src->padded_shape(), src->shape(),
static_cast <T *> (this->storage()),
- this->shape(),
+ this->padded_shape(), this->shape(),
src->extent(),
this->extent(),
srcregbox, dstregbox, NULL);
@@ -1357,10 +1403,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(0),
static_cast <T const *> (srcs.AT(1)->storage()),
times.AT(1),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1374,10 +1420,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(1),
static_cast <T const *> (srcs.AT(2)->storage()),
times.AT(2),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1393,10 +1439,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(2),
static_cast <T const *> (srcs.AT(3)->storage()),
times.AT(3),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1414,10 +1460,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(3),
static_cast <T const *> (srcs.AT(4)->storage()),
times.AT(4),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1446,10 +1492,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(0),
static_cast <T const *> (srcs.AT(1)->storage()),
times.AT(1),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1463,10 +1509,10 @@ time_interpolate (vector <data *> const & srcs,
times.AT(1),
static_cast <T const *> (srcs.AT(2)->storage()),
times.AT(2),
- srcs.AT(0)->shape(),
+ srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(),
static_cast <T *> (this->storage()),
time,
- this->shape(),
+ this->padded_shape(), this->shape(),
srcs.AT(0)->extent(),
this->extent(),
srcbox, dstbox, NULL);
@@ -1535,6 +1581,8 @@ output (ostream & os)
T Tdummy;
os << "data<" << typestring(Tdummy) << ">:"
<< "extent=" << extent() << ","
+ << "shape=" << shape() << ","
+ << "padded_shape=" << padded_shape() << ","
<< "stride=" << stride() << ",size=" << size();
return os;
}
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index 156e9cbbd..67acbe453 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -19,6 +19,7 @@
#endif
#include "bbox.hh"
+#include "cacheinfo.hh"
#include "commstate.hh"
#include "defs.hh"
#include "dist.hh"
@@ -115,6 +116,7 @@ gdata::~gdata ()
+#if 0
// Storage management
template<int D>
@@ -181,6 +183,7 @@ allocated_memory_shape (vect<int,D> shape)
done:;
return shape;
}
+#endif
@@ -262,8 +265,8 @@ transfer_from (comm_state & state,
find_source_timelevel
(times, time, order_time, my_transport_operator, timelevel0, ntimelevels);
if (is_src) assert (int (srcs.size()) >= ntimelevels);
- int const dstpoints = prod(allocated_memory_shape(dstbox));
- int const srcpoints = prod(allocated_memory_shape(srcbox)) * ntimelevels;
+ int const dstpoints = prod(pad_shape(dstbox));
+ int const srcpoints = prod(pad_shape(srcbox)) * ntimelevels;
bool const interp_on_src = dstpoints <= srcpoints;
int const npoints = interp_on_src ? dstpoints : srcpoints;
@@ -289,10 +292,10 @@ transfer_from (comm_state & state,
// copy the data into the send buffer
if (interp_on_src) {
size_t const sendbufsize =
- src->c_datatype_size() * prod(allocated_memory_shape(dstbox));
+ src->c_datatype_size() * prod(pad_shape(dstbox));
void * const sendbuf =
state.send_buffer (src->c_datatype(), dstproc,
- prod(allocated_memory_shape(dstbox)));
+ prod(pad_shape(dstbox)));
gdata * const buf =
src->make_typed (src->varindex, src->cent, src->transport_operator);
buf->allocate (dstbox, srcproc, sendbuf, sendbufsize);
@@ -301,14 +304,14 @@ transfer_from (comm_state & state,
time, order_space, order_time);
delete buf;
state.commit_send_space
- (src->c_datatype(), dstproc, prod(allocated_memory_shape(dstbox)));
+ (src->c_datatype(), dstproc, prod(pad_shape(dstbox)));
} else {
for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) {
size_t const sendbufsize =
- src->c_datatype_size() * prod(allocated_memory_shape(srcbox));
+ src->c_datatype_size() * prod(pad_shape(srcbox));
void * const sendbuf =
state.send_buffer (src->c_datatype(), dstproc,
- prod(allocated_memory_shape(srcbox)));
+ prod(pad_shape(srcbox)));
gdata * const buf =
src->make_typed (src->varindex, src->cent,
src->transport_operator);
@@ -316,7 +319,7 @@ transfer_from (comm_state & state,
buf->copy_from_innerloop (srcs.AT(tl), srcbox, srcbox, NULL);
delete buf;
state.commit_send_space (src->c_datatype(), dstproc,
- prod(allocated_memory_shape(srcbox)));
+ prod(pad_shape(srcbox)));
}
}
}
@@ -337,14 +340,13 @@ transfer_from (comm_state & state,
// copy from the recv buffer
if (interp_on_src) {
size_t const recvbufsize =
- c_datatype_size() * prod(allocated_memory_shape(dstbox));
+ c_datatype_size() * prod(pad_shape(dstbox));
void * const recvbuf =
- state.recv_buffer (c_datatype(), srcproc,
- prod(allocated_memory_shape(dstbox)));
+ state.recv_buffer (c_datatype(), srcproc, prod(pad_shape(dstbox)));
gdata * const buf = make_typed (varindex, cent, transport_operator);
buf->allocate (dstbox, dstproc, recvbuf, recvbufsize);
state.commit_recv_space (c_datatype(), srcproc,
- prod(allocated_memory_shape(dstbox)));
+ prod(pad_shape(dstbox)));
copy_from_innerloop (buf, dstbox, dstbox, NULL);
delete buf;
} else {
@@ -353,14 +355,14 @@ transfer_from (comm_state & state,
vector <CCTK_REAL> timebuf (ntimelevels);
for (int tl = 0; tl < ntimelevels; ++ tl) {
size_t const recvbufsize =
- c_datatype_size() * prod(allocated_memory_shape(srcbox));
+ c_datatype_size() * prod(pad_shape(srcbox));
void * const recvbuf =
state.recv_buffer (c_datatype(), srcproc,
- prod(allocated_memory_shape(srcbox)));
+ prod(pad_shape(srcbox)));
gdata * const buf = make_typed (varindex, cent, transport_operator);
buf->allocate (srcbox, dstproc, recvbuf, recvbufsize);
state.commit_recv_space
- (c_datatype(), srcproc, prod(allocated_memory_shape(srcbox)));
+ (c_datatype(), srcproc, prod(pad_shape(srcbox)));
bufs.AT(tl) = buf;
timebuf.AT(tl) = times.AT(timelevel0 + tl);
}
@@ -469,11 +471,3 @@ allmemory ()
}
return mem;
}
-
-
-
-template vect<int,3> gdata::allocated_memory_shape (bbox<int,3> const& extent);
-template vect<int,3> gdata::allocated_memory_shape (vect<int,3> shape);
-
-template vect<int,4> gdata::allocated_memory_shape (bbox<int,4> const& extent);
-template vect<int,4> gdata::allocated_memory_shape (vect<int,4> shape);
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index ea44bc6fc..129fccc1c 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -71,7 +71,8 @@ protected:
int _proc; // stored on processor
- ivect _shape, _stride; // shape and index order
+ ivect _shape; // shape
+ ivect _padded_shape, _stride; // allocated shape and index order
ibbox _extent; // bbox for all data
@@ -105,10 +106,6 @@ public:
0;
virtual void free () = 0;
virtual size_t allocsize (const ibbox& extent, const int proc) const = 0;
- template<int D>
- static vect<int,D> allocated_memory_shape (bbox<int,D> const& extent);
- template<int D>
- static vect<int,D> allocated_memory_shape (vect<int,D> shape);
// Accessors
bool has_storage () const {
@@ -145,6 +142,11 @@ public:
return _shape;
}
+ const ivect& padded_shape () const {
+ assert (_has_storage);
+ return _padded_shape;
+ }
+
const ivect& stride () const {
assert (_has_storage);
return _stride;
@@ -164,8 +166,10 @@ public:
assert (_has_storage);
assert (all((index-extent().lower()) % extent().stride() == 0));
ivect const ind = (index-extent().lower()) / extent().stride();
- assert (all(ind>=0 and ind<=shape()));
- return dot(ind, stride());
+ assert (all(ind>=0 and ind<shape()));
+ int const off = dot(ind, stride());
+ assert (off>=0 and off<size());
+ return off;
}
private:
diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
index 9590632ad..ddfe15254 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc
@@ -8,7 +8,6 @@
#include <loopcontrol.h>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -22,9 +21,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -35,9 +36,11 @@ namespace CarpetLib {
CCTK_REAL const t1,
T const * restrict const src2,
CCTK_REAL const t2,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -73,12 +76,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -89,6 +86,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -129,7 +134,8 @@ namespace CarpetLib {
// Loop over region
#pragma omp parallel
CCTK_LOOP3(interpolate_3d_2tl,
- i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext)
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ dstipadext,dstjpadext,dstkpadext)
{
dst [DSTIND3(i, j, k)] =
@@ -142,21 +148,23 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- interpolate_3d_2tl (T const * restrict const src1, \
- CCTK_REAL const t1, \
- T const * restrict const src2, \
- CCTK_REAL const t2, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- CCTK_REAL const t, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+#define TYPECASE(N,T) \
+ template \
+ void \
+ interpolate_3d_2tl (T const * restrict const src1, \
+ CCTK_REAL const t1, \
+ T const * restrict const src2, \
+ CCTK_REAL const t2, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ CCTK_REAL const t, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
index eaf24bea5..ce8249998 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc
@@ -8,7 +8,6 @@
#include <loopcontrol.h>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -22,9 +21,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -37,9 +38,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
T const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -75,12 +78,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -91,6 +88,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -133,7 +138,8 @@ namespace CarpetLib {
// Loop over region
#pragma omp parallel
CCTK_LOOP3(interpolate_3d_3tl,
- i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext)
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ dstipadext,dstjpadext,dstkpadext)
{
dst [DSTIND3(i, j, k)] =
@@ -147,23 +153,25 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- interpolate_3d_3tl (T const * restrict const src1, \
- CCTK_REAL const t1, \
- T const * restrict const src2, \
- CCTK_REAL const t2, \
- T const * restrict const src3, \
- CCTK_REAL const t3, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- CCTK_REAL const t, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+#define TYPECASE(N,T) \
+ template \
+ void \
+ interpolate_3d_3tl (T const * restrict const src1, \
+ CCTK_REAL const t1, \
+ T const * restrict const src2, \
+ CCTK_REAL const t2, \
+ T const * restrict const src3, \
+ CCTK_REAL const t3, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ CCTK_REAL const t, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
index 16039c7d8..74bca1837 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc
@@ -8,7 +8,6 @@
#include <loopcontrol.h>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -22,9 +21,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -39,9 +40,11 @@ namespace CarpetLib {
CCTK_REAL const t3,
T const * restrict const src4,
CCTK_REAL const t4,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -77,12 +80,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -93,6 +90,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -140,7 +145,8 @@ namespace CarpetLib {
// Loop over region
#pragma omp parallel
CCTK_LOOP3(interpolate_3d_4tl,
- i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext)
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ dstipadext,dstjpadext,dstkpadext)
{
dst [DSTIND3(i, j, k)] =
@@ -155,25 +161,27 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- interpolate_3d_4tl (T const * restrict const src1, \
- CCTK_REAL const t1, \
- T const * restrict const src2, \
- CCTK_REAL const t2, \
- T const * restrict const src3, \
- CCTK_REAL const t3, \
- T const * restrict const src4, \
- CCTK_REAL const t4, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- CCTK_REAL const t, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+#define TYPECASE(N,T) \
+ template \
+ void \
+ interpolate_3d_4tl (T const * restrict const src1, \
+ CCTK_REAL const t1, \
+ T const * restrict const src2, \
+ CCTK_REAL const t2, \
+ T const * restrict const src3, \
+ CCTK_REAL const t3, \
+ T const * restrict const src4, \
+ CCTK_REAL const t4, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ CCTK_REAL const t, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
index bb7f4d06d..74913a131 100644
--- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc
@@ -8,7 +8,6 @@
#include <loopcontrol.h>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -22,9 +21,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -41,9 +42,11 @@ namespace CarpetLib {
CCTK_REAL const t4,
T const * restrict const src5,
CCTK_REAL const t5,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -79,12 +82,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -95,6 +92,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -145,7 +150,8 @@ namespace CarpetLib {
// Loop over region
#pragma omp parallel
CCTK_LOOP3(interpolate_3d_5tl,
- i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext)
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ dstipadext,dstjpadext,dstkpadext)
{
dst [DSTIND3(i, j, k)] =
@@ -161,27 +167,29 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- interpolate_3d_5tl (T const * restrict const src1, \
- CCTK_REAL const t1, \
- T const * restrict const src2, \
- CCTK_REAL const t2, \
- T const * restrict const src3, \
- CCTK_REAL const t3, \
- T const * restrict const src4, \
- CCTK_REAL const t4, \
- T const * restrict const src5, \
- CCTK_REAL const t5, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- CCTK_REAL const t, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+#define TYPECASE(N,T) \
+ template \
+ void \
+ interpolate_3d_5tl (T const * restrict const src1, \
+ CCTK_REAL const t1, \
+ T const * restrict const src2, \
+ CCTK_REAL const t2, \
+ T const * restrict const src3, \
+ CCTK_REAL const t3, \
+ T const * restrict const src4, \
+ CCTK_REAL const t4, \
+ T const * restrict const src5, \
+ CCTK_REAL const t5, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ CCTK_REAL const t, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
index 969ec4bbf..5d56ed3e8 100644
--- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
+++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc
@@ -8,7 +8,6 @@
#include <loopcontrol.h>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -22,9 +21,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -55,9 +56,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
T const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -93,12 +96,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -109,6 +106,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -172,7 +177,8 @@ namespace CarpetLib {
// Loop over region
#pragma omp parallel
CCTK_LOOP3 (interpolate_end_3d_3tl,
- i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext)
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ srcipadext,srcjpadext,srckpadext)
{
T const s1 = src1 [SRCIND3(i, j, k)];
@@ -209,9 +215,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
CCTK_COMPLEX8 const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
CCTK_COMPLEX8 * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -234,9 +242,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
CCTK_COMPLEX16 const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
CCTK_COMPLEX16 * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -257,9 +267,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
CCTK_COMPLEX32 const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
CCTK_COMPLEX32 * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -282,9 +294,11 @@ namespace CarpetLib {
CCTK_REAL const t2, \
T const * restrict const src3, \
CCTK_REAL const t3, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
CCTK_REAL const t, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/make.code.defn b/Carpet/CarpetLib/src/make.code.defn
index 0956754cc..aab0f5372 100644
--- a/Carpet/CarpetLib/src/make.code.defn
+++ b/Carpet/CarpetLib/src/make.code.defn
@@ -7,6 +7,7 @@ SRCS = backtrace.cc \
bboxset.cc \
bboxtree.cc \
bintree.cc \
+ cacheinfo.cc \
commstate.cc \
data.cc \
defs.cc \
diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc
index 27f86e951..f3a4fa6ff 100644
--- a/Carpet/CarpetLib/src/mem.cc
+++ b/Carpet/CarpetLib/src/mem.cc
@@ -20,7 +20,12 @@ using namespace std;
-double const gmem::MEGA = 1000*1000;
+double const gmem::KILO = 1000.0;
+double const gmem::MEGA = 1000.0*1000.0;
+double const gmem::GIGA = 1000.0*1000.0*1000.0;
+double const gmem::TERA = 1000.0*1000.0*1000.0*1000.0;
+double const gmem::PETA = 1000.0*1000.0*1000.0*1000.0*1000.0;
+double const gmem::EXA = 1000.0*1000.0*1000.0*1000.0*1000.0*1000.0;
// Total number of currently allocated bytes and objects
double gmem::total_allocated_bytes = 0;
@@ -38,7 +43,8 @@ template<typename T>
mem<T>::
mem (size_t const vectorlength, size_t const nelems,
T * const memptr, size_t const memsize)
- : storage_ (memptr),
+ : storage_base_ (memptr),
+ storage_ (memptr),
nelems_ (nelems),
vectorlength_ (vectorlength),
owns_storage_ (false),
@@ -62,7 +68,10 @@ mem (size_t const vectorlength, size_t const nelems,
int(max_allowed_memory_MB));
}
try {
- storage_ = new T [vectorlength * nelems];
+ // TODO: align and pad storage
+ size_t const padding = 0;
+ storage_base_ = new T [vectorlength * nelems + padding];
+ storage_ = storage_base_ + padding;
owns_storage_ = true;
} catch (...) {
T Tdummy;
@@ -77,7 +86,9 @@ mem (size_t const vectorlength, size_t const nelems,
total_allocated_bytes += nbytes;
max_allocated_bytes = max (max_allocated_bytes, total_allocated_bytes);
if (poison_new_memory) {
- memset (storage_, poison_value, vectorlength * nelems * sizeof (T));
+ memset (storage_base_,
+ poison_value, (vectorlength * nelems +
+ storage_ - storage_base_) * sizeof (T));
}
} else {
assert (memsize >= vectorlength * nelems * sizeof (T));
@@ -96,7 +107,7 @@ mem<T>::
{
assert (not has_clients());
if (owns_storage_) {
- delete [] storage_;
+ delete [] storage_base_;
const double nbytes = vectorlength_ * nelems_ * sizeof (T);
total_allocated_bytes -= nbytes;
}
@@ -144,13 +155,15 @@ memory ()
const
{
return
+ memoryof (storage_base_) +
memoryof (storage_) +
memoryof (nelems_) +
memoryof (vectorlength_) +
memoryof (owns_storage_) +
memoryof (clients_) +
memoryof (num_clients_) +
- (owns_storage_ ? sizeof (T) * vectorlength_ * nelems_ : 0);
+ (owns_storage_ ? (vectorlength_ * nelems_ +
+ storage_ - storage_base_) : 0) * sizeof (T);
}
diff --git a/Carpet/CarpetLib/src/mem.hh b/Carpet/CarpetLib/src/mem.hh
index ff7f05080..807daff70 100644
--- a/Carpet/CarpetLib/src/mem.hh
+++ b/Carpet/CarpetLib/src/mem.hh
@@ -19,7 +19,12 @@ class gmem
{
public:
+ static double const KILO;
static double const MEGA;
+ static double const GIGA;
+ static double const TERA;
+ static double const PETA;
+ static double const EXA ;
// Total number of currently allocated bytes and objects
static double total_allocated_bytes;
@@ -35,6 +40,7 @@ public:
template<typename T>
class mem: public gmem
{
+ T * storage_base_;
T * storage_;
size_t nelems_;
size_t vectorlength_;
diff --git a/Carpet/CarpetLib/src/operator_prototypes_3d.hh b/Carpet/CarpetLib/src/operator_prototypes_3d.hh
index 6b344b7c3..6d8850726 100644
--- a/Carpet/CarpetLib/src/operator_prototypes_3d.hh
+++ b/Carpet/CarpetLib/src/operator_prototypes_3d.hh
@@ -22,6 +22,7 @@ namespace CarpetLib {
static inline
size_t
index3 (size_t const i, size_t const j, size_t const k,
+ size_t const padexti, size_t const padextj, size_t const padextk,
size_t const exti, size_t const extj, size_t const extk)
{
#ifdef CARPET_DEBUG
@@ -30,7 +31,7 @@ namespace CarpetLib {
assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk);
#endif
- return i + exti * (j + extj * k);
+ return i + padexti * (j + padextj * k);
}
@@ -56,8 +57,10 @@ namespace CarpetLib {
template <typename T>
void
copy_3d (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -70,8 +73,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -79,87 +84,15 @@ namespace CarpetLib {
ibbox3 const & restrict dstregbbox,
void * extraargs);
-#if 0
- template <typename T>
- void
- prolongate_3d_o1_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_o3_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_o5_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_o7_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_o9_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_o11_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-#endif
-
template <typename T>
void
prolongate_3d_o5_monotone_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -172,8 +105,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_cc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -181,49 +116,13 @@ namespace CarpetLib {
ibbox3 const & restrict dstregbbox,
void * extraargs);
-#if 0
- template <typename T>
- void
- prolongate_3d_cc_o0_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_cc_o1_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-
- template <typename T>
- void
- prolongate_3d_cc_o2_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict srcregbbox,
- ibbox3 const & restrict dstregbbox,
- void * extraargs);
-#endif
-
template <typename T, int ORDER>
void
prolongate_3d_cc_eno_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -235,8 +134,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_cc_eno_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -260,8 +161,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_dgfe_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -274,8 +177,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -291,9 +196,11 @@ namespace CarpetLib {
CCTK_REAL const t1,
T const * restrict const src2,
CCTK_REAL const t2,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -309,9 +216,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
T const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -329,9 +238,11 @@ namespace CarpetLib {
CCTK_REAL const t3,
T const * restrict const src4,
CCTK_REAL const t4,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -351,9 +262,11 @@ namespace CarpetLib {
CCTK_REAL const t4,
T const * restrict const src5,
CCTK_REAL const t5,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -369,9 +282,11 @@ namespace CarpetLib {
CCTK_REAL const t2,
T const * restrict const src3,
CCTK_REAL const t3,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
CCTK_REAL const t,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -384,8 +299,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -398,8 +315,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_o3_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -412,8 +331,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_o5_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -426,8 +347,10 @@ namespace CarpetLib {
template <typename T, int centi, int centj, int centk>
void
restrict_3d_vc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -440,8 +363,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
restrict_3d_dgfe_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
diff --git a/Carpet/CarpetLib/src/operator_prototypes_4d.hh b/Carpet/CarpetLib/src/operator_prototypes_4d.hh
index 1d0a4609a..71d1e3bcd 100644
--- a/Carpet/CarpetLib/src/operator_prototypes_4d.hh
+++ b/Carpet/CarpetLib/src/operator_prototypes_4d.hh
@@ -22,6 +22,7 @@ namespace CarpetLib {
static inline
size_t
index4 (size_t const i, size_t const j, size_t const k, size_t const l,
+ size_t const padexti, size_t const padextj, size_t const padextk, size_t const padextl,
size_t const exti, size_t const extj, size_t const extk, size_t const extl)
{
#ifdef CARPET_DEBUG
@@ -31,7 +32,7 @@ namespace CarpetLib {
assert (static_cast <ptrdiff_t> (l) >= 0 and l < extl);
#endif
- return i + exti * (j + extj * (k + extk * l));
+ return i + padexti * (j + padextj * (k + padextk * l));
}
@@ -47,8 +48,10 @@ namespace CarpetLib {
template <typename T>
void
copy_4d (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -61,8 +64,10 @@ namespace CarpetLib {
template <typename T>
void
prolongate_4d_o1_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -75,8 +80,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_4d_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc
index 5cd8fdd2c..7b5c6cee2 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc
@@ -6,7 +6,6 @@
#include <cmath>
#include <cstdlib>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -43,13 +42,22 @@ using namespace std;
namespace CarpetLib {
-
-#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext)
-#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext)
-#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext)
-#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext)
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
+ dstiext, dstjext, dstkext)
+#define SRCOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ dstiext, dstjext, dstkext)
@@ -947,8 +955,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_cc_eno_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -1033,13 +1043,15 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
+ size_t const srcipadext = srcpadext[0];
+ size_t const srcjpadext = srcpadext[1];
+ size_t const srckpadext = srcpadext[2];
+
+ size_t const dstipadext = dstpadext[0];
+ size_t const dstjpadext = dstpadext[1];
+ size_t const dstkpadext = dstpadext[2];
size_t const srciext = srcext[0];
size_t const srcjext = srcext[1];
@@ -1283,8 +1295,10 @@ namespace CarpetLib {
template <> \
void \
prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -1299,8 +1313,10 @@ namespace CarpetLib {
template <> \
void \
prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -1324,8 +1340,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -1337,8 +1355,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90
index b39f43766..c970e41e6 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90
@@ -52,18 +52,21 @@
-subroutine prolongate_3d_cc_real8_tvd (src, srciext, srcjext, &
- srckext, dst, dstiext, dstjext, dstkext, srcbbox, &
- dstbbox, regbbox)
+subroutine prolongate_3d_cc_real8_tvd ( &
+ src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, &
+ dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, &
+ srcbbox, dstbbox, regbbox)
implicit none
CCTK_REAL8, parameter :: one=1, fourth=one/4
+ integer srcipadext, srcjpadext, srckpadext
integer srciext, srcjext, srckext
- CCTK_REAL8 src(srciext,srcjext,srckext)
+ CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext)
+ integer dstipadext, dstjpadext, dstkpadext
integer dstiext, dstjext, dstkext
- CCTK_REAL8 dst(dstiext,dstjext,dstkext)
+ CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext)
! bbox(:,1) is lower boundary (inclusive)
! bbox(:,2) is upper boundary (inclusive)
! bbox(:,3) is stride
diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
index bf960f0aa..269b34947 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc
@@ -6,7 +6,6 @@
#include <cmath>
#include <cstdlib>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -43,13 +42,22 @@ using namespace std;
namespace CarpetLib {
-
-#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext)
-#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext)
-#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext)
-#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext)
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
+ dstiext, dstjext, dstkext)
+#define SRCOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ dstiext, dstjext, dstkext)
@@ -320,8 +328,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_cc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -406,13 +416,15 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
+ size_t const srcipadext = srcpadext[0];
+ size_t const srcjpadext = srcpadext[1];
+ size_t const srckpadext = srcpadext[2];
+
+ size_t const dstipadext = dstpadext[0];
+ size_t const dstjpadext = dstpadext[1];
+ size_t const dstkpadext = dstpadext[2];
size_t const srciext = srcext[0];
size_t const srcjext = srcext[1];
@@ -656,8 +668,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2<T,0> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -668,8 +682,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2<T,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -680,8 +696,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2<T,2> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -692,20 +710,24 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2<T,3> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs); \
\
template \
void \
prolongate_3d_cc_rf2<T,4> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -716,8 +738,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_cc_rf2<T,5> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
index 640b084c2..3be587551 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
@@ -20,18 +20,30 @@ using namespace hrscc;
namespace CarpetLib {
-#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext))
-#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext))
-#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext))
-#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext))
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
+ dstiext, dstjext, dstkext)
+#define SRCOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ dstiext, dstjext, dstkext)
template<typename T, int ORDER>
void
prolongate_3d_dgfe_rf2(T const *restrict const src,
+ ivect3 const& restrict srcpadext,
ivect3 const& restrict srcext,
T *restrict const dst,
+ ivect3 const& restrict dstpadext,
ivect3 const& restrict dstext,
ibbox3 const& restrict srcbbox,
ibbox3 const& restrict dstbbox,
@@ -98,6 +110,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -143,8 +163,9 @@ namespace CarpetLib {
#ifdef HRSCC_HH
ptrdiff_t const i=0;
#pragma omp parallel for //collapse(2)
- for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
- for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
+ // Zwicky's Intel compiler ices on ptrdiff_t
+ for (int k=0; k<regkext; k+=2*(ORDER+1)) {
+ for (int j=0; j<regjext; j+=2*(ORDER+1)) {
GLLElement<ORDER>::prolongate_2D
(&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d,
&dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d);
@@ -182,8 +203,9 @@ namespace CarpetLib {
#ifdef HRSCC_HH
ptrdiff_t const j=0;
#pragma omp parallel for //collapse(2)
- for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
- for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
+ // Zwicky's Intel compiler ices on ptrdiff_t
+ for (int k=0; k<regkext; k+=2*(ORDER+1)) {
+ for (int i=0; i<regiext; i+=2*(ORDER+1)) {
GLLElement<ORDER>::prolongate_2D
(&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d,
&dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d);
@@ -221,8 +243,9 @@ namespace CarpetLib {
#ifdef HRSCC_HH
ptrdiff_t const k=0;
#pragma omp parallel for //collapse(2)
- for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
- for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
+ // Zwicky's Intel compiler ices on ptrdiff_t
+ for (int j=0; j<regjext; j+=2*(ORDER+1)) {
+ for (int i=0; i<regiext; i+=2*(ORDER+1)) {
GLLElement<ORDER>::prolongate_2D
(&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d,
&dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d);
@@ -257,9 +280,10 @@ namespace CarpetLib {
// Loop over fine region
#ifdef HRSCC_HH
#pragma omp parallel for //collapse(3)
- for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
- for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
- for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
+ // Zwicky's Intel compiler ices on ptrdiff_t
+ for (int k=0; k<regkext; k+=2*(ORDER+1)) {
+ for (int j=0; j<regjext; j+=2*(ORDER+1)) {
+ for (int i=0; i<regiext; i+=2*(ORDER+1)) {
GLLElement<ORDER>::prolongate_full
(&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr,
&dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr);
@@ -281,8 +305,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_dgfe_rf2<T,5>(T const *restrict const src, \
+ ivect3 const& restrict srcpadext, \
ivect3 const& restrict srcext, \
T *restrict const dst, \
+ ivect3 const& restrict dstpadext, \
ivect3 const& restrict dstext, \
ibbox3 const& restrict srcbbox, \
ibbox3 const& restrict dstbbox, \
@@ -300,8 +326,10 @@ namespace CarpetLib {
template<>
void
prolongate_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src,
+ ivect3 const& restrict srcpadext,
ivect3 const& restrict srcext,
CCTK_COMPLEX *restrict const dst,
+ ivect3 const& restrict dstpadext,
ivect3 const& restrict dstext,
ibbox3 const& restrict srcbbox,
ibbox3 const& restrict dstbbox,
diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc
index adc4d0693..48fe2f31b 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc
@@ -23,7 +23,6 @@
#include <cmath>
#include <cstdlib>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -37,11 +36,14 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
+
template <typename T>
inline
@@ -83,8 +85,10 @@ namespace CarpetLib {
template <typename T>
void
prolongate_3d_o5_monotone_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -137,13 +141,15 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
+ size_t const srcipadext = srcpadext[0];
+ size_t const srcjpadext = srcpadext[1];
+ size_t const srckpadext = srcpadext[2];
+
+ size_t const dstipadext = dstpadext[0];
+ size_t const dstjpadext = dstpadext[1];
+ size_t const dstkpadext = dstpadext[2];
size_t const srciext = srcext[0];
size_t const srcjext = srcext[1];
@@ -827,8 +833,10 @@ namespace CarpetLib {
template <>
void
prolongate_3d_o5_monotone_rf2 (CCTK_COMPLEX const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
CCTK_COMPLEX * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -843,8 +851,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_o5_monotone_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90
index fbff861c4..94a9e60f1 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90
+++ b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90
@@ -75,19 +75,22 @@ function eno1d(q)
end function eno1d
-subroutine prolongate_3d_real8_eno (src, srciext, srcjext, &
- srckext, dst, dstiext, dstjext, dstkext, srcbbox, &
- dstbbox, regbbox)
+subroutine prolongate_3d_real8_eno ( &
+ src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, &
+ dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, &
+ srcbbox, dstbbox, regbbox)
implicit none
CCTK_REAL8 one
parameter (one = 1)
+ integer srcipadext, srcjpadext, srckpadext
integer srciext, srcjext, srckext
- CCTK_REAL8 src(srciext,srcjext,srckext)
+ CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext)
+ integer dstipadext, dstjpadext, dstkpadext
integer dstiext, dstjext, dstkext
- CCTK_REAL8 dst(dstiext,dstjext,dstkext)
+ CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext)
!!$ bbox(:,1) is lower boundary (inclusive)
!!$ bbox(:,2) is upper boundary (inclusive)
!!$ bbox(:,3) is stride
diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90
index f58cabf98..230a20bd5 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90
+++ b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90
@@ -26,19 +26,22 @@
-subroutine prolongate_3d_real8_tvd (src, srciext, srcjext, &
- srckext, dst, dstiext, dstjext, dstkext, srcbbox, &
- dstbbox, regbbox)
+subroutine prolongate_3d_real8_tvd ( &
+ src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, &
+ dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, &
+ srcbbox, dstbbox, regbbox)
implicit none
CCTK_REAL8 one
parameter (one = 1)
+ integer srcipadext, srcjpadext, srckpadext
integer srciext, srcjext, srckext
- CCTK_REAL8 src(srciext,srcjext,srckext)
+ CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext)
+ integer dstipadext, dstjpadext, dstkpadext
integer dstiext, dstjext, dstkext
- CCTK_REAL8 dst(dstiext,dstjext,dstkext)
+ CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext)
! bbox(:,1) is lower boundary (inclusive)
! bbox(:,2) is upper boundary (inclusive)
! bbox(:,3) is stride
diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90
index d5ec43427..044132a10 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90
+++ b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90
@@ -155,19 +155,22 @@ function weno1d(q)
end function weno1d
-subroutine prolongate_3d_real8_weno (src, srciext, srcjext, &
- srckext, dst, dstiext, dstjext, dstkext, srcbbox, &
- dstbbox, regbbox)
+subroutine prolongate_3d_real8_weno ( &
+ src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, &
+ dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, &
+ srcbbox, dstbbox, regbbox)
implicit none
CCTK_REAL8 one
parameter (one = 1)
+ integer srcipadext, srcjpadext, srckpadext
integer srciext, srcjext, srckext
- CCTK_REAL8 src(srciext,srcjext,srckext)
+ CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext)
+ integer dstipadext, dstjpadext, dstkpadext
integer dstiext, dstjext, dstkext
- CCTK_REAL8 dst(dstiext,dstjext,dstkext)
+ CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext)
!!$ bbox(:,1) is lower boundary (inclusive)
!!$ bbox(:,2) is upper boundary (inclusive)
!!$ bbox(:,3) is stride
diff --git a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc
index 984d68c07..cdf24a985 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc
@@ -8,7 +8,6 @@
#include <cstdlib>
#include <iostream>
-#include "gdata.hh"
#include "vectors.h"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -21,10 +20,20 @@ namespace CarpetLib {
-#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext)
-#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext)
-#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext)
-#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext)
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
+ dstiext, dstjext, dstkext)
+#define SRCOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ dstiext, dstjext, dstkext)
@@ -473,8 +482,10 @@ namespace CarpetLib {
template <typename T, int ORDER>
void
prolongate_3d_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -550,13 +561,15 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
+ size_t const srcipadext = srcpadext[0];
+ size_t const srcjpadext = srcpadext[1];
+ size_t const srckpadext = srcpadext[2];
+
+ size_t const dstipadext = dstpadext[0];
+ size_t const dstjpadext = dstpadext[1];
+ size_t const dstkpadext = dstpadext[2];
size_t const srciext = srcext[0];
size_t const srcjext = srcext[1];
@@ -801,8 +814,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -813,8 +828,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,3> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -825,8 +842,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,5> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -837,8 +856,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,7> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -849,8 +870,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,9> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -861,8 +884,10 @@ namespace CarpetLib {
template \
void \
prolongate_3d_rf2<T,11> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc
index 1f7e8203e..58b22f719 100644
--- a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc
@@ -6,7 +6,6 @@
#include <cmath>
#include <cstdlib>
-#include "gdata.hh"
#include "operator_prototypes_4d.hh"
#include "typeprops.hh"
@@ -18,11 +17,13 @@ namespace CarpetLib {
-#define SRCIND4(i,j,k,l) \
- index4 (i, j, k, l, \
+#define SRCIND4(i,j,k,l) \
+ index4 (i, j, k, l, \
+ srcipadext, srcjpadext, srckpadext, srclpadext, \
srciext, srcjext, srckext, srclext)
-#define DSTIND4(i,j,k,l) \
- index4 (i, j, k, l, \
+#define DSTIND4(i,j,k,l) \
+ index4 (i, j, k, l, \
+ dstipadext, dstjpadext, dstkpadext, dstlpadext, \
dstiext, dstjext, dstkext, dstlext)
@@ -30,8 +31,10 @@ namespace CarpetLib {
template <typename T>
void
prolongate_4d_o1_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -84,13 +87,17 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
+ size_t const srcipadext = srcpadext[0];
+ size_t const srcjpadext = srcpadext[1];
+ size_t const srckpadext = srcpadext[2];
+ size_t const srclpadext = srcpadext[3];
+
+ size_t const dstipadext = dstpadext[0];
+ size_t const dstjpadext = dstpadext[1];
+ size_t const dstkpadext = dstpadext[2];
+ size_t const dstlpadext = dstpadext[3];
size_t const srciext = srcext[0];
size_t const srcjext = srcext[1];
@@ -593,8 +600,10 @@ namespace CarpetLib {
template \
void \
prolongate_4d_o1_rf2 (T const * restrict const src, \
+ ivect4 const & restrict srcpadext, \
ivect4 const & restrict srcext, \
T * restrict const dst, \
+ ivect4 const & restrict dstpadext, \
ivect4 const & restrict dstext, \
ibbox4 const & restrict srcbbox, \
ibbox4 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
index 2f544110a..981398ae3 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
@@ -5,7 +5,6 @@
#include <cassert>
#include <cmath>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -19,9 +18,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -38,14 +39,16 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_o3_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict,
- ibbox3 const & restrict regbbox,
- void * extraargs)
+ ivect3 const & restrict srcpadext,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstpadext,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict,
+ ibbox3 const & restrict regbbox,
+ void * extraargs)
{
assert (not extraargs);
@@ -80,12 +83,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -101,6 +98,14 @@ namespace CarpetLib {
+ int const srcipadext = srcpadext[0];
+ int const srcjpadext = srcpadext[1];
+ int const srckpadext = srcpadext[2];
+
+ int const dstipadext = dstpadext[0];
+ int const dstjpadext = dstpadext[1];
+ int const dstkpadext = dstpadext[2];
+
int const srciext = srcext[0];
int const srcjext = srcext[1];
int const srckext = srcext[2];
@@ -235,18 +240,20 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- restrict_3d_cc_o3_rf2 (T const * restrict const src, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
- void * extraargs);
+#define TYPECASE(N,T) \
+ template \
+ void \
+ restrict_3d_cc_o3_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
+ void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
index a90ebb868..6c90daa44 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
@@ -19,9 +19,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
#define SRCOFF3(i,j,k) \
offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
@@ -107,14 +109,16 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_o5_rf2 (T const * restrict const src,
- ivect3 const & restrict srcext,
- T * restrict const dst,
- ivect3 const & restrict dstext,
- ibbox3 const & restrict srcbbox,
- ibbox3 const & restrict dstbbox,
- ibbox3 const & restrict,
- ibbox3 const & restrict regbbox,
- void * extraargs)
+ ivect3 const & restrict srcpadext,
+ ivect3 const & restrict srcext,
+ T * restrict const dst,
+ ivect3 const & restrict dstpadext,
+ ivect3 const & restrict dstext,
+ ibbox3 const & restrict srcbbox,
+ ibbox3 const & restrict dstbbox,
+ ibbox3 const & restrict,
+ ibbox3 const & restrict regbbox,
+ void * extraargs)
{
assert (not extraargs);
@@ -149,12 +153,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -170,6 +168,14 @@ namespace CarpetLib {
+ int const srcipadext = srcpadext[0];
+ int const srcjpadext = srcpadext[1];
+ int const srckpadext = srcpadext[2];
+
+ int const dstipadext = dstpadext[0];
+ int const dstjpadext = dstpadext[1];
+ int const dstkpadext = dstpadext[2];
+
int const srciext = srcext[0];
int const srcjext = srcext[1];
int const srckext = srcext[2];
@@ -239,18 +245,20 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- restrict_3d_cc_o5_rf2 (T const * restrict const src, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
- void * extraargs);
+#define TYPECASE(N,T) \
+ template \
+ void \
+ restrict_3d_cc_o5_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
+ void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
index dd419cb92..13b59a7f8 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
@@ -5,7 +5,6 @@
#include <cassert>
#include <cmath>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -19,9 +18,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -29,8 +30,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_cc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -71,12 +74,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -92,6 +89,14 @@ namespace CarpetLib {
+ int const srcipadext = srcpadext[0];
+ int const srcjpadext = srcpadext[1];
+ int const srckpadext = srcpadext[2];
+
+ int const dstipadext = dstpadext[0];
+ int const dstjpadext = dstpadext[1];
+ int const dstkpadext = dstpadext[2];
+
int const srciext = srcext[0];
int const srcjext = srcext[1];
int const srckext = srcext[2];
@@ -127,6 +132,8 @@ namespace CarpetLib {
for (int j=0; j<regjext; ++j) {
for (int i=0; i<regiext; ++i) {
+ // TODO: Introduce higher-order restriction operators (but
+ // don't use these for hydro!)
dst [DSTIND3(i, j, k)] =
+ f1*f1*f1 * src [SRCIND3(2*i , 2*j , 2*k )]
+ f2*f1*f1 * src [SRCIND3(2*i+1, 2*j , 2*k )]
@@ -145,17 +152,19 @@ namespace CarpetLib {
-#define TYPECASE(N,T) \
- template \
- void \
- restrict_3d_cc_rf2 (T const * restrict const src, \
- ivect3 const & restrict srcext, \
- T * restrict const dst, \
- ivect3 const & restrict dstext, \
- ibbox3 const & restrict srcbbox, \
- ibbox3 const & restrict dstbbox, \
- ibbox3 const & restrict, \
- ibbox3 const & restrict regbbox, \
+#define TYPECASE(N,T) \
+ template \
+ void \
+ restrict_3d_cc_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
+ ivect3 const & restrict srcext, \
+ T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
+ ivect3 const & restrict dstext, \
+ ibbox3 const & restrict srcbbox, \
+ ibbox3 const & restrict dstbbox, \
+ ibbox3 const & restrict, \
+ ibbox3 const & restrict regbbox, \
void * extraargs);
#include "typecase.hh"
#undef TYPECASE
diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
index 4b2c0c648..e438d360b 100644
--- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
@@ -20,18 +20,30 @@ using namespace hrscc;
namespace CarpetLib {
-#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext))
-#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext))
-#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext))
-#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext))
+#define SRCIND3(i,j,k) \
+ index3 (i, j, k, \
+ srcipadext, srcjpadext, srckpadext, \
+ srciext, srcjext, srckext)
+#define DSTIND3(i,j,k) \
+ index3 (i, j, k, \
+ dstipadext, dstjpadext, dstkpadext, \
+ dstiext, dstjext, dstkext)
+#define SRCOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ srciext, srcjext, srckext)
+#define DSTOFF3(i,j,k) \
+ offset3 (i, j, k, \
+ dstiext, dstjext, dstkext)
template<typename T, int ORDER>
void
restrict_3d_dgfe_rf2(T const *restrict const src,
+ ivect3 const& restrict srcpadext,
ivect3 const& restrict srcext,
T *restrict const dst,
+ ivect3 const& restrict dstpadext,
ivect3 const& restrict dstext,
ibbox3 const& restrict srcbbox,
ibbox3 const& restrict dstbbox,
@@ -82,6 +94,14 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -127,9 +147,10 @@ namespace CarpetLib {
// Loop over coarse region
#ifdef HRSCC_HH
#pragma omp parallel for //collapse(3)
- for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) {
- for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) {
- for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) {
+ // Zwicky's Intel compiler ices on ptrdiff_t
+ for (int k=0; k<regkext; k+=ORDER+1) {
+ for (int j=0; j<regjext; j+=ORDER+1) {
+ for (int i=0; i<regiext; i+=ORDER+1) {
GLLElement<ORDER>::restrict_full
(&src[SRCIND3(srcioff+2*i, srcjoff+2*j, srckoff+2*k)], srcstr,
&dst[DSTIND3(dstioff+i, dstjoff+j, dstkoff+k)], dststr);
@@ -149,8 +170,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_dgfe_rf2<T,5>(T const *restrict const src, \
+ ivect3 const& restrict srcpadext, \
ivect3 const& restrict srcext, \
T *restrict const dst, \
+ ivect3 const& restrict dstpadext, \
ivect3 const& restrict dstext, \
ibbox3 const& restrict srcbbox, \
ibbox3 const& restrict dstbbox, \
@@ -168,8 +191,10 @@ namespace CarpetLib {
template<>
void
restrict_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src,
+ ivect3 const& restrict srcpadext,
ivect3 const& restrict srcext,
CCTK_COMPLEX *restrict const dst,
+ ivect3 const& restrict dstpadext,
ivect3 const& restrict dstext,
ibbox3 const& restrict srcbbox,
ibbox3 const& restrict dstbbox,
diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
index 5fff24a3e..c1dade7fb 100644
--- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
@@ -7,7 +7,6 @@
#include <cstdlib>
#include <iostream>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -21,9 +20,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
@@ -31,8 +32,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_3d_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -67,12 +70,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -85,6 +82,14 @@ namespace CarpetLib {
+ int const srcipadext = srcpadext[0];
+ int const srcjpadext = srcpadext[1];
+ int const srckpadext = srcpadext[2];
+
+ int const dstipadext = dstpadext[0];
+ int const dstjpadext = dstpadext[1];
+ int const dstkpadext = dstpadext[2];
+
int const srciext = srcext[0];
int const srcjext = srcext[1];
int const srckext = srcext[2];
@@ -126,8 +131,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_rf2 (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
index 827fb5dff..44799a60f 100644
--- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
@@ -5,7 +5,6 @@
#include <cassert>
#include <cmath>
-#include "gdata.hh"
#include "operator_prototypes_3d.hh"
#include "typeprops.hh"
@@ -19,9 +18,11 @@ namespace CarpetLib {
#define SRCIND3(i,j,k) \
index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
+ srcipadext, srcjpadext, srckpadext, \
srciext, srcjext, srckext)
#define DSTIND3(i,j,k) \
index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \
+ dstipadext, dstjpadext, dstkpadext, \
dstiext, dstjext, dstkext)
#define SRCOFF3(i,j,k) \
offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \
@@ -136,8 +137,10 @@ namespace CarpetLib {
template <typename T, int centi, int centj, int centk>
void
restrict_3d_vc_rf2 (T const * restrict const src,
+ ivect3 const & restrict srcpadext,
ivect3 const & restrict srcext,
T * restrict const dst,
+ ivect3 const & restrict dstpadext,
ivect3 const & restrict dstext,
ibbox3 const & restrict srcbbox,
ibbox3 const & restrict dstbbox,
@@ -188,12 +191,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect3 const regext = regbbox.shape() / regbbox.stride();
@@ -217,6 +214,14 @@ namespace CarpetLib {
+ int const srcipadext = srcpadext[0];
+ int const srcjpadext = srcpadext[1];
+ int const srckpadext = srcpadext[2];
+
+ int const dstipadext = dstpadext[0];
+ int const dstjpadext = dstpadext[1];
+ int const dstkpadext = dstpadext[2];
+
int const srciext = srcext[0];
int const srcjext = srcext[1];
int const srckext = srcext[2];
@@ -284,8 +289,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,0,0,0> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -295,8 +302,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,0,0,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -306,8 +315,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,0,1,0> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -317,8 +328,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,0,1,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -328,8 +341,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,1,0,0> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -339,8 +354,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,1,0,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -350,8 +367,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,1,1,0> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
@@ -361,8 +380,10 @@ namespace CarpetLib {
template \
void \
restrict_3d_vc_rf2<T,1,1,1> (T const * restrict const src, \
+ ivect3 const & restrict srcpadext, \
ivect3 const & restrict srcext, \
T * restrict const dst, \
+ ivect3 const & restrict dstpadext, \
ivect3 const & restrict dstext, \
ibbox3 const & restrict srcbbox, \
ibbox3 const & restrict dstbbox, \
diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
index 01b66ee78..bd5d7079c 100644
--- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
@@ -7,7 +7,6 @@
#include <cstdlib>
#include <iostream>
-#include "gdata.hh"
#include "operator_prototypes_4d.hh"
#include "typeprops.hh"
@@ -21,9 +20,11 @@ namespace CarpetLib {
#define SRCIND4(i,j,k,l) \
index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \
+ srcipadext, srcjpadext, srckpadext, srclpadext, \
srciext, srcjext, srckext, srclext)
#define DSTIND4(i,j,k,l) \
index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \
+ dstipadext, dstjpadext, dstkpadext, dstlpadext, \
dstiext, dstjext, dstkext, dstlext)
@@ -31,8 +32,10 @@ namespace CarpetLib {
template <typename T>
void
restrict_4d_rf2 (T const * restrict const src,
+ ivect4 const & restrict srcpadext,
ivect4 const & restrict srcext,
T * restrict const dst,
+ ivect4 const & restrict dstpadext,
ivect4 const & restrict dstext,
ibbox4 const & restrict srcbbox,
ibbox4 const & restrict dstbbox,
@@ -67,12 +70,6 @@ namespace CarpetLib {
CCTK_WARN (0, "Internal error: region extent is not contained in array extent");
}
- if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or
- dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride())))
- {
- CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes");
- }
-
ivect4 const regext = regbbox.shape() / regbbox.stride();
@@ -83,6 +80,16 @@ namespace CarpetLib {
+ ptrdiff_t const srcipadext = srcpadext[0];
+ ptrdiff_t const srcjpadext = srcpadext[1];
+ ptrdiff_t const srckpadext = srcpadext[2];
+ ptrdiff_t const srclpadext = srcpadext[3];
+
+ ptrdiff_t const dstipadext = dstpadext[0];
+ ptrdiff_t const dstjpadext = dstpadext[1];
+ ptrdiff_t const dstkpadext = dstpadext[2];
+ ptrdiff_t const dstlpadext = dstpadext[3];
+
ptrdiff_t const srciext = srcext[0];
ptrdiff_t const srcjext = srcext[1];
ptrdiff_t const srckext = srcext[2];
@@ -132,8 +139,10 @@ namespace CarpetLib {
template \
void \
restrict_4d_rf2 (T const * restrict const src, \
+ ivect4 const & restrict srcpadext, \
ivect4 const & restrict srcext, \
T * restrict const dst, \
+ ivect4 const & restrict dstpadext, \
ivect4 const & restrict dstext, \
ibbox4 const & restrict srcbbox, \
ibbox4 const & restrict dstbbox, \