diff options
44 files changed, 1427 insertions, 641 deletions
diff --git a/Carpet/Carpet/interface.ccl b/Carpet/Carpet/interface.ccl index a3a76fce2..5858a8cbf 100644 --- a/Carpet/Carpet/interface.ccl +++ b/Carpet/Carpet/interface.ccl @@ -13,6 +13,7 @@ uses include header: loopcontrol.h uses include header: mpi_string.hh +uses include header: cacheinfo.hh uses include header: defs.hh uses include header: dist.hh uses include header: typecase.hh diff --git a/Carpet/Carpet/src/modes.cc b/Carpet/Carpet/src/modes.cc index 84b58b349..9af0039eb 100644 --- a/Carpet/Carpet/src/modes.cc +++ b/Carpet/Carpet/src/modes.cc @@ -4,10 +4,12 @@ #include <cstdio> #include <cstdlib> #include <cstring> +#include <functional> #include <cctk.h> #include <cctk_Parameters.h> +#include <cacheinfo.hh> #include <defs.hh> #include <gdata.hh> #include <ggf.hh> @@ -138,7 +140,7 @@ namespace Carpet { ivect_ref(info.ubnd) = (ext.upper() - baseext.lower()) / ext.stride(); ivect_ref(info.lsh) = ext.shape() / ext.stride(); - ivect_ref(info.ash) = gdata::allocated_memory_shape (ext); + ivect_ref(info.ash) = pad_shape(ext); if (gp.disttype == CCTK_DISTRIB_CONSTANT) { int const dir = gp.dim==0 ? 0 : gp.dim-1; ivect & gsh = ivect_ref(info.gsh); @@ -611,7 +613,7 @@ namespace Carpet { ivect_ref(cctkGH->cctk_ubnd) = (ext.upper() - baseext.lower()) / ext.stride(); ivect_ref(cctkGH->cctk_lsh) = ext.shape() / ext.stride(); - ivect_ref(cctkGH->cctk_ash) = gdata::allocated_memory_shape (ext); + ivect_ref(cctkGH->cctk_ash) = pad_shape(ext); for (int d=0; d<dim; ++d) { cctkGH->cctk_bbox[2*d ] = obnds[0][d]; diff --git a/Carpet/CarpetIOHDF5/src/Input.cc b/Carpet/CarpetIOHDF5/src/Input.cc index 637bb7032..4f932adee 100644 --- a/Carpet/CarpetIOHDF5/src/Input.cc +++ b/Carpet/CarpetIOHDF5/src/Input.cc @@ -1391,6 +1391,13 @@ static int ReadVar (const cGH* const cctkGH, #endif const ibbox& exterior_membox = data.dd->light_boxes.at(mglevel).at(reflevel).at(component).exterior; + int const var0 = CCTK_FirstVarIndexI(gindex); + assert (var0>=0 and var0<CCTK_NumVars()); + int const var = patch->vindex - var0; + assert (var>=0 and var<CCTK_NumVarsInGroupI(gindex)); + const ggf* const ff = data.data.AT(var); + const gdata* const processor_component = ff->data_pointer + (timelevel, reflevel, local_component, mglevel); // skip this dataset if it doesn't overlap with this component's // exterior, or if it only reads what has already been read @@ -1424,8 +1431,9 @@ static int ReadVar (const cGH* const cctkGH, slice_start_size_t memorigin[dim], fileorigin[dim]; hsize_t memdims[dim], count[dim]; for (int i = 0; i < patch->rank; i++) { - memdims[patch->rank-i-1] = - (membox.upper() - membox.lower())[i] / stride[i] + 1; + assert (processor_component->shape()[i] == + (membox.upper() - membox.lower())[i] / stride[i] + 1); + memdims[patch->rank-i-1] = processor_component->padded_shape()[i]; count[patch->rank-i-1] = (overlap.upper() - overlap.lower())[i] / stride[i] + 1; memorigin[patch->rank-i-1] = diff --git a/Carpet/CarpetIOHDF5/src/Output.cc b/Carpet/CarpetIOHDF5/src/Output.cc index 7288c9bc2..a6ac668cf 100644 --- a/Carpet/CarpetIOHDF5/src/Output.cc +++ b/Carpet/CarpetIOHDF5/src/Output.cc @@ -237,12 +237,17 @@ int WriteVarUnchunked (const cGH* const cctkGH, // before HDF5-1.6.4 the H5Sselect_hyperslab() function expected // the 'start' argument to be of type 'hssize_t' slice_start_size_t overlaporigin[dim]; + hsize_t hyperslab_start[dim], hyperslab_count[dim]; for (int d = 0; d < group.dim; ++d) { assert (group.dim-1-d>=0 and group.dim-1-d<dim); overlaporigin[group.dim-1-d] = ((overlap.lower() - bbox->lower()) / overlap.stride())[d]; overlapshape[group.dim-1-d] = - (overlap.shape() / overlap.stride())[d]; + processor_component->padded_shape()[d]; + assert (all (processor_component->shape() == + overlap.shape() / overlap.stride())); + hyperslab_start[group.dim-1-d] = 0; + hyperslab_count[group.dim-1-d] = processor_component->shape()[d]; } // Write the processor component as a hyperslab into the recombined @@ -250,6 +255,9 @@ int WriteVarUnchunked (const cGH* const cctkGH, hid_t overlap_dataspace; HDF5_ERROR (overlap_dataspace = H5Screate_simple (group.dim, overlapshape, NULL)); + HDF5_ERROR (H5Sselect_hyperslab (overlap_dataspace, H5S_SELECT_SET, + hyperslab_start, NULL, + hyperslab_count, NULL)); HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET, overlaporigin, NULL, overlapshape, NULL)); @@ -436,10 +444,15 @@ int WriteVarChunkedSequential (const cGH* const cctkGH, hsize_t shape[dim]; hssize_t origin[dim]; + hsize_t hyperslab_start[dim], hyperslab_count[dim]; for (int d = 0; d < group.dim; ++d) { assert (group.dim-1-d>=0 and group.dim-1-d<dim); origin[group.dim-1-d] = (bbox.lower() / bbox.stride())[d]; - shape[group.dim-1-d] = (bbox.shape() / bbox.stride())[d]; + shape[group.dim-1-d] = processor_component->padded_shape()[d]; + assert (all (processor_component->shape() == + bbox.shape() / bbox.stride())); + hyperslab_start[group.dim-1-d] = 0; + hyperslab_count[group.dim-1-d] = processor_component->shape()[d]; } // Write the component as an individual dataset @@ -459,6 +472,9 @@ int WriteVarChunkedSequential (const cGH* const cctkGH, HDF5_ERROR (H5Pset_filter (plist, H5Z_FILTER_FLETCHER32, 0, 0, NULL)); } HDF5_ERROR (dataspace = H5Screate_simple (group.dim, shape, NULL)); + HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET, + hyperslab_start, NULL, + hyperslab_count, NULL)); HDF5_ERROR (dataset = H5Dcreate (outfile, datasetname.str().c_str(), filedatatype, dataspace, plist)); @@ -545,10 +561,15 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, // const ibbox& bbox = (*ff) (request->timelevel, refinementlevel, // group.disttype == CCTK_DISTRIB_CONSTANT ? // 0 : component, mglevel)->extent(); - const dh* dd = arrdata.at(gindex).at(Carpet::map).dd; + const dh* const dd = arrdata.AT(gindex).AT(Carpet::map).dd; const ibbox& bbox = dd->light_boxes.AT(mglevel).AT(refinementlevel) .AT(group.disttype == CCTK_DISTRIB_CONSTANT ? 0 : component).exterior; + const ggf* const ff = arrdata.AT(gindex).AT(Carpet::map).data.AT(var); + const gdata* const processor_component = ff->data_pointer + (request->timelevel, refinementlevel, + group.disttype == CCTK_DISTRIB_CONSTANT ? 0 : local_component, + mglevel); // Don't create zero-sized components if (bbox.empty()) continue; @@ -618,10 +639,15 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, // Get the shape of the HDF5 dataset (in Fortran index order) hsize_t shape[dim]; hssize_t origin[dim]; + hsize_t hyperslab_start[dim], hyperslab_count[dim]; for (int d = 0; d < group.dim; ++d) { assert (group.dim-1-d>=0 and group.dim-1-d<dim); origin[group.dim-1-d] = (bbox.lower() / bbox.stride())[d]; - shape[group.dim-1-d] = (bbox.shape() / bbox.stride())[d]; + shape[group.dim-1-d] = processor_component->padded_shape()[d]; + assert (all (processor_component->shape() == + bbox.shape() / bbox.stride())); + hyperslab_start[group.dim-1-d] = 0; + hyperslab_count[group.dim-1-d] = processor_component->shape()[d]; } // Write the component as an individual dataset @@ -641,6 +667,9 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, HDF5_ERROR (H5Pset_filter (plist, H5Z_FILTER_FLETCHER32, 0, 0, NULL)); } HDF5_ERROR (dataspace = H5Screate_simple (group.dim, shape, NULL)); + HDF5_ERROR (H5Sselect_hyperslab (dataspace, H5S_SELECT_SET, + hyperslab_start, NULL, + hyperslab_count, NULL)); HDF5_ERROR (dataset = H5Dcreate (outfile, datasetname.str().c_str(), filedatatype, dataspace, plist)); @@ -827,7 +856,8 @@ static int AddAttributes (const cGH *const cctkGH, const char *fullname, dataspace, H5P_DEFAULT)); HDF5_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ioffsetdenom[0])); HDF5_ERROR (H5Aclose (attr)); - ivect const ioffset = ((bbox.lower() % bbox.stride()) + bbox.stride()) % bbox.stride(); + ivect const ioffset = + ((bbox.lower() % bbox.stride()) + bbox.stride()) % bbox.stride(); HDF5_ERROR (attr = H5Acreate (dataset, "ioffset", H5T_NATIVE_INT, dataspace, H5P_DEFAULT)); HDF5_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ioffset[0])); diff --git a/Carpet/CarpetInterp2/interface.ccl b/Carpet/CarpetInterp2/interface.ccl index 24bc256bf..529876225 100644 --- a/Carpet/CarpetInterp2/interface.ccl +++ b/Carpet/CarpetInterp2/interface.ccl @@ -8,6 +8,7 @@ INCLUDE HEADER: fasterp.hh IN carpetinterp2.hh USES INCLUDE HEADER: nompi.h +USES INCLUDE HEADER: cacheinfo.hh USES INCLUDE HEADER: defs.hh USES INCLUDE HEADER: typeprops.hh USES INCLUDE HEADER: vect.hh diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc index d5fa4fb04..fc3ac8a90 100644 --- a/Carpet/CarpetInterp2/src/fasterp.cc +++ b/Carpet/CarpetInterp2/src/fasterp.cc @@ -15,6 +15,7 @@ # include "nompi.h" #endif +#include <cacheinfo.hh> #include <carpet.hh> #include <vect.hh> @@ -192,7 +193,10 @@ namespace CarpetInterp2 { int fasterp_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, + ivect const & ash, +#ifdef CARPETINTERP2_CHECK ivect const & lsh, +#endif int const order) { assert (order <= max_order); @@ -203,8 +207,8 @@ namespace CarpetInterp2 { pn = iloc.pn; ipos = iloc.ipos; - // Save lsh - saved_lsh = lsh; + // Save ash + saved_ash = ash; #endif #ifndef NDEBUG @@ -290,9 +294,9 @@ namespace CarpetInterp2 { return -1; } #endif - ind3d = iloc.ind3d + index(lsh, iorigin); + ind3d = iloc.ind3d + index(ash, iorigin); #ifdef CARPETINTERP2_CHECK - assert (index(lsh,ind) == ind3d); + assert (index(ash,ind) == ind3d); #endif return 0; } @@ -310,26 +314,29 @@ namespace CarpetInterp2 { template <int O0, int O1, int O2> void fasterp_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const { #ifdef CARPETINTERP2_CHECK - assert (all (lsh == saved_lsh)); + assert (all (ash == saved_ash)); #endif assert (O0 == 0 or not exact[0]); assert (O1 == 0 or not exact[1]); assert (O2 == 0 or not exact[2]); size_t const di = 1; - size_t const dj = di * lsh[0]; - size_t const dk = dj * lsh[1]; + size_t const dj = di * ash[0]; + size_t const dk = dj * ash[1]; #ifdef CARPETINTERP2_CHECK assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh)); - assert (ind3d == index(lsh,ind)); - assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind)); + assert (ind3d == index(ash,ind)); + assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(ash,ind)); #endif for (size_t v=0; v<varptrs.size(); ++v) { @@ -377,7 +384,10 @@ namespace CarpetInterp2 { template <int O> void fasterp_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const @@ -386,29 +396,29 @@ namespace CarpetInterp2 { if (exact[2]) { if (exact[1]) { if (exact[0]) { - interpolate<Z,Z,Z> (lsh, varptrs, vals); + interpolate<Z,Z,Z> (ash, lsh, varptrs, vals); } else { - interpolate<O,Z,Z> (lsh, varptrs, vals); + interpolate<O,Z,Z> (ash, lsh, varptrs, vals); } } else { if (exact[0]) { - interpolate<Z,O,Z> (lsh, varptrs, vals); + interpolate<Z,O,Z> (ash, lsh, varptrs, vals); } else { - interpolate<O,O,Z> (lsh, varptrs, vals); + interpolate<O,O,Z> (ash, lsh, varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { - interpolate<Z,Z,O> (lsh, varptrs, vals); + interpolate<Z,Z,O> (ash, lsh, varptrs, vals); } else { - interpolate<O,Z,O> (lsh, varptrs, vals); + interpolate<O,Z,O> (ash, lsh, varptrs, vals); } } else { if (exact[0]) { - interpolate<Z,O,O> (lsh, varptrs, vals); + interpolate<Z,O,O> (ash, lsh, varptrs, vals); } else { - interpolate<O,O,O> (lsh, varptrs, vals); + interpolate<O,O,O> (ash, lsh, varptrs, vals); } } } @@ -422,25 +432,28 @@ namespace CarpetInterp2 { // exact in each of the directions. void fasterp_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif int const order, vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const { switch (order) { - case 0: interpolate< 0> (lsh, varptrs, vals); break; - case 1: interpolate< 1> (lsh, varptrs, vals); break; - case 2: interpolate< 2> (lsh, varptrs, vals); break; - case 3: interpolate< 3> (lsh, varptrs, vals); break; - case 4: interpolate< 4> (lsh, varptrs, vals); break; - case 5: interpolate< 5> (lsh, varptrs, vals); break; - case 6: interpolate< 6> (lsh, varptrs, vals); break; - case 7: interpolate< 7> (lsh, varptrs, vals); break; - case 8: interpolate< 8> (lsh, varptrs, vals); break; - case 9: interpolate< 9> (lsh, varptrs, vals); break; - case 10: interpolate<10> (lsh, varptrs, vals); break; - case 11: interpolate<11> (lsh, varptrs, vals); break; + case 0: interpolate< 0> (ash, lsh, varptrs, vals); break; + case 1: interpolate< 1> (ash, lsh, varptrs, vals); break; + case 2: interpolate< 2> (ash, lsh, varptrs, vals); break; + case 3: interpolate< 3> (ash, lsh, varptrs, vals); break; + case 4: interpolate< 4> (ash, lsh, varptrs, vals); break; + case 5: interpolate< 5> (ash, lsh, varptrs, vals); break; + case 6: interpolate< 6> (ash, lsh, varptrs, vals); break; + case 7: interpolate< 7> (ash, lsh, varptrs, vals); break; + case 8: interpolate< 8> (ash, lsh, varptrs, vals); break; + case 9: interpolate< 9> (ash, lsh, varptrs, vals); break; + case 10: interpolate<10> (ash, lsh, varptrs, vals); break; + case 11: interpolate<11> (ash, lsh, varptrs, vals); break; default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, @@ -477,7 +490,7 @@ namespace CarpetInterp2 { #endif os << "ind3d=" << ind3d; #ifdef CARPETINTERP2_CHECK - os << "," << "saved_lsh=" << saved_lsh; + os << "," << "saved_ash=" << saved_ash; #endif os << "}"; } @@ -495,8 +508,11 @@ namespace CarpetInterp2 { int fasterp_eno2_src_loc_t:: calc_stencil (fasterp_iloc_t const & iloc, + ivect const & ash, +#ifdef CARPETINTERP2_CHECK ivect const & lsh, - int const unused_order) +#endif + int /*order*/) { //assert (order <= max_order); CCTK_REAL const eps = 1.0e-12; @@ -506,8 +522,8 @@ namespace CarpetInterp2 { pn = iloc.pn; ipos = iloc.ipos; - // Save lsh - saved_lsh = lsh; + // Save ash + saved_ash = ash; #endif // first we setup 1st order stencil! @@ -640,9 +656,9 @@ namespace CarpetInterp2 { return -1; } #endif - ind3d = iloc.ind3d + index(lsh, iorigin); + ind3d = iloc.ind3d + index(ash, iorigin); #ifdef CARPETINTERP2_CHECK - assert (index(lsh,ind) == ind3d); + assert (index(ash,ind) == ind3d); #endif } @@ -719,26 +735,29 @@ namespace CarpetInterp2 { template <int O0, int O1, int O2> void fasterp_eno2_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const { #ifdef CARPETINTERP2_CHECK - assert (all (lsh == saved_lsh)); + assert (all (ash == saved_ash)); #endif assert (O0 == 0 or not exact[0]); assert (O1 == 0 or not exact[1]); assert (O2 == 0 or not exact[2]); size_t const di = 1; - size_t const dj = di * lsh[0]; - size_t const dk = dj * lsh[1]; + size_t const dj = di * ash[0]; + size_t const dk = dj * ash[1]; #ifdef CARPETINTERP2_CHECK assert (all (ind>=0 and ind+ivect(O0,O1,O2)<lsh)); - assert (ind3d == index(lsh,ind)); - assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(lsh,ind)); + assert (ind3d == index(ash,ind)); + assert (int(di*ind[0] + dj*ind[1] + dk*ind[2]) == index(ash,ind)); #endif for (size_t v=0; v<varptrs.size(); ++v) { @@ -869,7 +888,10 @@ namespace CarpetInterp2 { template <int O> void fasterp_eno2_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const @@ -878,29 +900,29 @@ namespace CarpetInterp2 { if (exact[2]) { if (exact[1]) { if (exact[0]) { - interpolate<Z,Z,Z> (lsh, varptrs, vals); + interpolate<Z,Z,Z> (ash, lsh, varptrs, vals); } else { - interpolate<O,Z,Z> (lsh, varptrs, vals); + interpolate<O,Z,Z> (ash, lsh, varptrs, vals); } } else { if (exact[0]) { - interpolate<Z,O,Z> (lsh, varptrs, vals); + interpolate<Z,O,Z> (ash, lsh, varptrs, vals); } else { - interpolate<O,O,Z> (lsh, varptrs, vals); + interpolate<O,O,Z> (ash, lsh, varptrs, vals); } } } else { if (exact[1]) { if (exact[0]) { - interpolate<Z,Z,O> (lsh, varptrs, vals); + interpolate<Z,Z,O> (ash, lsh, varptrs, vals); } else { - interpolate<O,Z,O> (lsh, varptrs, vals); + interpolate<O,Z,O> (ash, lsh, varptrs, vals); } } else { if (exact[0]) { - interpolate<Z,O,O> (lsh, varptrs, vals); + interpolate<Z,O,O> (ash, lsh, varptrs, vals); } else { - interpolate<O,O,O> (lsh, varptrs, vals); + interpolate<O,O,O> (ash, lsh, varptrs, vals); } } } @@ -914,14 +936,17 @@ namespace CarpetInterp2 { // exact in each of the directions. void fasterp_eno2_src_loc_t:: - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif int const order, vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict const vals) const { switch (order) { - case 2: interpolate< 2> (lsh, varptrs, vals); break; + case 2: interpolate< 2> (ash, lsh, varptrs, vals); break; default: // Add higher orders here as desired CCTK_WARN (CCTK_WARN_ABORT, @@ -958,7 +983,7 @@ namespace CarpetInterp2 { #endif os << "ind3d=" << ind3d; #ifdef CARPETINTERP2_CHECK - os << "," << "saved_lsh=" << saved_lsh; + os << "," << "saved_ash=" << saved_ash; #endif os << "}"; } @@ -1173,8 +1198,8 @@ namespace CarpetInterp2 { assert (all (abs(dpos) <= rvect(0.5))); ivect const ind = ipos - ext.lower() / ext.stride(); - ivect const lsh = ext.shape() / ext.stride(); - int const ind3d = index(lsh, ind); + ivect const ash = pad_shape(ext); + int const ind3d = index(ash, ind); #if 0 ENTER_SINGLEMAP_MODE (cctkGH, m, CCTK_GF) { ENTER_LOCAL_MODE (cctkGH, c, CCTK_GF) { @@ -1426,6 +1451,7 @@ namespace CarpetInterp2 { assert (Carpet::vhh.AT(m)->is_local(rl,c)); ibbox const & ext = Carpet::vdd.AT(m)->light_boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; + send_comp.ash = pad_shape(ext); send_comp.lsh = ext.shape() / ext.stride(); send_comp.offset = offset; @@ -1448,7 +1474,8 @@ namespace CarpetInterp2 { //fasterp_src_loc_t sloc; FASTERP sloc; - int const ierr = sloc.calc_stencil (iloc, send_comp.lsh, order); + int const ierr = + sloc.calc_stencil (iloc, send_comp.ash, send_comp.lsh, order); if (ierr) { CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not determine valid interpolation stencil for point %d on map %d, refinement level %d, component %d", @@ -1482,7 +1509,7 @@ namespace CarpetInterp2 { for (int n=0; n<send_comp.npoints; ++n) { FASTERP const & sloc = send_comp.locs.at(n); assert (sloc.mrc == send_comp.mrc); - assert (all (sloc.saved_lsh == send_comp.lsh)); + assert (all (sloc.saved_ash == send_comp.ash)); } } #endif @@ -1680,7 +1707,8 @@ namespace CarpetInterp2 { for (int n=0; n<int(send_comp.locs.size()); ++n) { size_t const ind = (send_comp.offset + n) * nvars; send_comp.locs.AT(n).interpolate - (send_comp.lsh, order, varptrs, &computed_points.AT(ind)); + (send_comp.ash, send_comp.lsh, + order, varptrs, &computed_points.AT(ind)); #ifdef CARPETINTERP2_CHECK computed_pn.AT(send_comp.offset + n) = send_comp.locs.AT(n).pn; #endif diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh index 081b1c82d..37deea70e 100644 --- a/Carpet/CarpetInterp2/src/fasterp.hh +++ b/Carpet/CarpetInterp2/src/fasterp.hh @@ -191,17 +191,23 @@ namespace CarpetInterp2 { #ifdef CARPETINTERP2_CHECK public: - ivect saved_lsh; // copy of lsh + ivect saved_ash; // copy of ash private: #endif public: int calc_stencil (fasterp_iloc_t const & iloc, + ivect const & ash, +#ifdef CARPETINTERP2_CHECK ivect const & lsh, +#endif int order); void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif int order, vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) @@ -210,13 +216,19 @@ namespace CarpetInterp2 { private: template <int O> void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) const; template <int O0, int O1, int O2> void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) const; @@ -251,17 +263,23 @@ namespace CarpetInterp2 { #ifdef CARPETINTERP2_CHECK public: - ivect saved_lsh; // copy of lsh + ivect saved_ash; // copy of ash private: #endif public: int calc_stencil (fasterp_iloc_t const & iloc, + ivect const & ash, +#ifdef CARPETINTERP2_CHECK ivect const & lsh, - int unused_order); +#endif + int /*order*/); void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif int order, vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) @@ -270,13 +288,19 @@ namespace CarpetInterp2 { private: template <int O> void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) const; template <int O0, int O1, int O2> void - interpolate (ivect const & lsh, + interpolate (ivect const & ash, +#ifdef CARPETINTERP2_CHECK + ivect const & lsh, +#endif vector<CCTK_REAL const *> const & varptrs, CCTK_REAL * restrict vals) const; @@ -317,7 +341,10 @@ namespace CarpetInterp2 { vector<FASTERP> locs; mrc_t mrc; // source map, refinement level, component + ivect ash; +#ifdef CARPETINTERP2_CHECK ivect lsh; +#endif int offset; int npoints; }; diff --git a/Carpet/CarpetLib/configuration.ccl b/Carpet/CarpetLib/configuration.ccl index 17ffa1f79..a77f237fc 100644 --- a/Carpet/CarpetLib/configuration.ccl +++ b/Carpet/CarpetLib/configuration.ccl @@ -8,7 +8,7 @@ PROVIDES CarpetLib REQUIRES MPI Vectors -OPTIONAL LoopControl +OPTIONAL LoopControl MPI { } diff --git a/Carpet/CarpetLib/interface.ccl b/Carpet/CarpetLib/interface.ccl index 12cba170c..6495fdec3 100644 --- a/Carpet/CarpetLib/interface.ccl +++ b/Carpet/CarpetLib/interface.ccl @@ -4,6 +4,7 @@ IMPLEMENTS: CarpetLib includes header: mpi_string.hh in mpi_string.hh +includes header: cacheinfo.hh in cacheinfo.hh includes header: defs.hh in defs.hh includes header: dist.hh in dist.hh includes header: typecase.hh in typecase.hh diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index c559f1604..c13d54ba1 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -159,13 +159,13 @@ BOOLEAN combine_recompose "Recompose all grid functions of one refinement levels -# Memory allocation parameters - -INT avoid_arraysize_bytes "Avoid array sizes that are multiples of this" STEERABLE=recover -{ - 0 :: "don't avoid anything" - 2:* :: "" -} 0 +# # Memory allocation parameters +# +# INT avoid_arraysize_bytes "Avoid array sizes that are multiples of this" STEERABLE=recover +# { +# 0 :: "don't avoid anything" +# 2:* :: "" +# } 0 @@ -211,6 +211,103 @@ BOOLEAN use_mpi_ssend "Use MPI_Ssend instead of MPI_Isend" STEERABLE=always +# Memory and cache information -- this is machine dependent and should +# be determined at run time or set via simfactory + +CCTK_INT vector_size "vector size" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT D1size "level 1 data cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT D1linesize "level 1 data cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT D1assoc "level 1 data cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT L2size "level 2 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT L2linesize "level 2 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT L2assoc "level 2 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT L3size "level 3 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT L3linesize "level 3 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT L3assoc "level 3 unified cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT TLB_D1entries "level 1 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT TLB_D1pagesize "level 1 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT TLB_D1assoc "level 1 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT TLB_L2entries "level 2 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT TLB_L2pagesize "level 2 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 +CCTK_INT TLB_L2assoc "level 2 TLB cache" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + +CCTK_INT pagesize "size of a memory page" STEERABLE=recover +{ + 0 :: "unknown" + 1:* :: "" +} 0 + + + SHARES: IO USES STRING out_dir diff --git a/Carpet/CarpetLib/src/cacheinfo.cc b/Carpet/CarpetLib/src/cacheinfo.cc new file mode 100644 index 000000000..897cb533e --- /dev/null +++ b/Carpet/CarpetLib/src/cacheinfo.cc @@ -0,0 +1,162 @@ +#include "cacheinfo.hh" + +#include <cctk_Parameters.h> + +#include <vectors.h> + + + +template<int D> +vect<int,D> +pad_shape (bbox<int,D> const& extent) +{ + assert (all (extent.shape() >= 0)); + return pad_shape(extent.shape() / extent.stride()); +} + + + +template<int D> +vect<int,D> +pad_shape (vect<int,D> const& shape) +{ + DECLARE_CCTK_PARAMETERS; + + assert (all(shape>=0)); + + static bool have_cacheinfo = false; + static vector<cacheinfo_t> cacheinfo; + if (not have_cacheinfo) { + // Ignore L1 caches that are probably too small to be useful (e.g. + // on Intel or AMD processors) + // TODO: make this a parameter + if (D1size >= 128*1024) { + cacheinfo.push_back(cacheinfo_t(D1size, D1linesize, D1assoc)); + } +#if 0 + // TODO: this is too simplistic: + // Add page size as a cache + if (pagesize>0) { + cacheinfo.push_back(cacheinfo_t(pagesize)); + } +#endif + if (L2size>0) { + cacheinfo.push_back(cacheinfo_t(L2size, L2linesize, L2assoc)); + } + if (L3size>0) { + cacheinfo.push_back(cacheinfo_t(L3size, L3linesize, L3assoc)); + } + if (TLB_D1entries>0) { + ptrdiff_t const TLB_D1size = TLB_D1entries * TLB_D1pagesize * TLB_D1assoc; + cacheinfo.push_back(cacheinfo_t(TLB_D1size, TLB_D1pagesize, TLB_D1assoc)); + } + if (TLB_L2entries>0) { + ptrdiff_t const TLB_L2size = TLB_L2entries * TLB_L2pagesize * TLB_L2assoc; + cacheinfo.push_back(cacheinfo_t(TLB_L2size, TLB_L2pagesize, TLB_L2assoc)); + } + + // TODO: sort caches by their sizes + for (size_t n=0; n<cacheinfo.size(); ++n) { + cacheinfo_t const& ci = cacheinfo.at(n); + if (n>0) { + // Ensure that the cache size is larger than the next lower + // cache size + assert (ci.size() > cacheinfo.at(n-1).size()); + // Ensure that the cache line size is evenly divided by the + // next lower cache line size + assert (ci.linesize() % cacheinfo.at(n-1).linesize() == 0); + assert (ci.stride() > cacheinfo.at(n-1).stride()); + } + } // for cacheinfo + + have_cacheinfo = true; + } // if not have_cacheinfo + + vect<int,D> padded_shape; + int accumulated_npoints = 1; + for (int d=0; d<D; ++d) { + int npoints = shape[d]; + + if (d == 0) { +#if VECTORISE && VECTORISE_ALIGNED_ARRAYS + // Pad array to a multiple of the vector size. Note that this is + // a hard requirement, so that we can emit aligned load/store + // operations. + npoints = align_up (npoints, CCTK_REAL_VEC_SIZE); +#endif + if (vector_size > 0) { + npoints = align_up (npoints, vector_size); + } + } + + for (size_t n=0; n<cacheinfo.size(); ++n) { + cacheinfo_t const& ci = cacheinfo.at(n); + + // Pad array in this direction to a multiple of this cache line + // size + assert (ci.linesize() % sizeof(CCTK_REAL) == 0); + int const linesize = ci.linesize() / sizeof(CCTK_REAL); + assert (is_power_of_2(linesize)); + if (npoints * accumulated_npoints >= linesize) { + // The extent is at least one cache line long: round up to the + // next full cache line + npoints = align_up (npoints, linesize); + } else { +#if 0 + // The extent is less than one cache line long: Ensure that + // the array size divides the cache line size evenly by + // rounding to the next power of 2 + // NOTE: This is disabled, since this would align everything + // to powers of 2. + npoints = next_power_of_2(npoints); +#endif + } + + // Avoid multiples of the cache stride + assert (ci.stride() % sizeof(CCTK_REAL) == 0); + int const stride = ci.stride() / sizeof(CCTK_REAL); + if (npoints * accumulated_npoints % stride == 0) { + assert (linesize < stride); + npoints += linesize; + } + + } // for cacheinfo + + padded_shape[d] = npoints; + accumulated_npoints *= npoints; + } + assert (prod (padded_shape) == accumulated_npoints); + + // self-check + for (int d=0; d<D; ++d) { + assert (padded_shape[d] >= shape[d]); +#if VECTORISE && VECTORISE_ALIGNED_ARRAYS + if (d == 0) { + assert (padded_shape[d] % CCTK_REAL_VEC_SIZE == 0); + } +#endif + if (vector_size > 0) { + if (d == 0) { + assert (padded_shape[d] % vector_size == 0); + } + } + + // TODO: add self-checks for the other requirements as well + } + + if (verbose) { + ostringstream buf; + buf << "padding " << shape << " to " << padded_shape; + CCTK_INFO (buf.str().c_str()); + } + + return padded_shape; +} + + + +template vect<int,3> pad_shape (bbox<int,3> const& extent); +template vect<int,3> pad_shape (vect<int,3> const& shape); + +template vect<int,4> pad_shape (bbox<int,4> const& extent); +template vect<int,4> pad_shape (vect<int,4> const& shape); diff --git a/Carpet/CarpetLib/src/cacheinfo.hh b/Carpet/CarpetLib/src/cacheinfo.hh new file mode 100644 index 000000000..14b464a86 --- /dev/null +++ b/Carpet/CarpetLib/src/cacheinfo.hh @@ -0,0 +1,158 @@ +#ifndef CACHEINFO_HH +#define CACHEINFO_HH + +#include <cctk.h> + +#include <limits> + +#include "bbox.hh" +#include "defs.hh" +#include "vect.hh" + + + +static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t div_down (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return x / align; +} + +static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t div_up (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return (x + align - 1) / align; +} + +static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t align_down (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return div_down(x, align) * align; +} + +static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t align_up (ptrdiff_t const x, ptrdiff_t const align) +{ + assert (x >= 0); + assert (align > 0); + return div_up(x, align) * align; +} + +static ptrdiff_t next_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t next_power_of_2 (ptrdiff_t const x) +{ + assert (x > 0); + ptrdiff_t res = 1; + while (res < x) { + res *= 2; + assert (res > 0); // try to catch overflows + } + return res; +} + +static ptrdiff_t previous_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t previous_power_of_2 (ptrdiff_t const x) +{ + return next_power_of_2(x/2+1); +} + +static bool is_power_of_2 (ptrdiff_t const x) + CCTK_ATTRIBUTE_UNUSED; +static bool is_power_of_2 (ptrdiff_t const x) +{ + return x == next_power_of_2(x); +} + +#if 0 +static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y) + CCTK_ATTRIBUTE_UNUSED; +static ptrdiff_t lcm (ptrdiff_t const x, ptrdiff_t const y) +{ + assert (x > 0 && y > 0); + ptrdiff_t z = x; + // TODO: improve LCM algorithm + while (z % y != 0) z += x; + assert (z % x == 0 && z % y == 0); + return z; +} +#endif + + + +class cacheinfo_t { + ptrdiff_t m_size; // bytes + ptrdiff_t m_linesize; // bytes (pagesize for TLBs) + ptrdiff_t m_associativity; +public: + bool invariant () const + { + return + is_power_of_2(m_size) and + is_power_of_2(m_linesize) and + is_power_of_2(m_associativity) and + m_size % (m_linesize * m_associativity) == 0; + } + cacheinfo_t (ptrdiff_t const a_size, + ptrdiff_t const a_linesize, + ptrdiff_t const a_associativity) + : m_size (a_size), + m_linesize (a_linesize), + m_associativity (a_associativity) + { + assert (invariant()); + } + cacheinfo_t (ptrdiff_t const a_linesize) + : m_size (previous_power_of_2(numeric_limits<ptrdiff_t>::max())), + m_linesize (a_linesize), + m_associativity (1) + { + assert (invariant()); + } + // size in bytes + ptrdiff_t size() const + { + return m_size; + } + // line size in bytes + ptrdiff_t linesize() const + { + return m_linesize; + } + // associativity + ptrdiff_t associativity() const + { + return m_associativity; + } + // number of cache elements + ptrdiff_t num_elements() const + { + return size() / (linesize() * associativity()); + } + // stride (between main memory locations that use the same cache + // element) in bytes + ptrdiff_t stride() const + { + return num_elements() * linesize(); + } +}; + + + +// These routines are apparently not pure -- don't know why +template<int D> +vect<int,D> pad_shape (bbox<int,D> const& extent) /*CCTK_ATTRIBUTE_PURE*/; +template<int D> +vect<int,D> pad_shape (vect<int,D> const& shape) /*CCTK_ATTRIBUTE_PURE*/; + +#endif // CACHEINFO_HH diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc index 11eedc0c2..7cb4ddb80 100644 --- a/Carpet/CarpetLib/src/copy_3d.cc +++ b/Carpet/CarpetLib/src/copy_3d.cc @@ -22,9 +22,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -32,8 +34,10 @@ namespace CarpetLib { template <typename T> void copy_3d (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -48,7 +52,6 @@ namespace CarpetLib { dstbbox.stride() != dstregbbox.stride() or srcregbbox1.stride() != dstregbbox.stride())) { -#pragma omp critical { cout << "copy_3d.cc:" << endl << "srcbbox=" << srcbbox << endl @@ -66,7 +69,6 @@ namespace CarpetLib { // This could be handled, but is likely to point to an error // elsewhere if (dstregbbox.empty()) { -#pragma omp critical CCTK_WARN (0, "Internal error: region extent is empty"); } @@ -79,7 +81,6 @@ namespace CarpetLib { if (not srcregbbox.is_contained_in(srcbbox) or not dstregbbox.is_contained_in(dstbbox)) { -#pragma omp critical { cout << "copy_3d.cc:" << endl << "srcbbox=" << srcbbox << endl @@ -93,13 +94,6 @@ namespace CarpetLib { } } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { -#pragma omp critical - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = dstregbbox.shape() / dstregbbox.stride(); @@ -110,6 +104,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -152,8 +154,10 @@ namespace CarpetLib { template \ void \ copy_3d (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc index 7aed29aaf..b0cc024f4 100644 --- a/Carpet/CarpetLib/src/copy_4d.cc +++ b/Carpet/CarpetLib/src/copy_4d.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND4(i,j,k,l) \ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) -#define DSTIND4(i,j,k,l) \ - index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ +#define DSTIND4(i,j,k,l) \ + index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void copy_4d (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -68,12 +71,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect4 const regext = regbbox.shape() / regbbox.stride(); @@ -84,6 +81,16 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + ptrdiff_t const srclpadext = srcpadext[3]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const dstlpadext = dstpadext[3]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -133,8 +140,10 @@ namespace CarpetLib { template \ void \ copy_4d (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 47cb392bf..775f5ec50 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -18,6 +18,7 @@ #include "bbox.hh" #include "bboxset.hh" +#include "cacheinfo.hh" #include "defs.hh" #include "dist.hh" #include "timestat.hh" @@ -37,8 +38,10 @@ static void call_operator (void (* the_operator) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -46,8 +49,10 @@ call_operator (void ibbox3 const & restrict dstregbbox, void * const extraargs), T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -90,7 +95,7 @@ call_operator (void dstregbbox.stride()); if (not mydstregbbox.empty()) { (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, mydstregbbox, extraargs); # ifdef CARPET_DEBUG #pragma omp critical @@ -113,8 +118,10 @@ static void call_operator (void (* the_operator) (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -122,8 +129,10 @@ call_operator (void ibbox4 const & restrict dstregbbox, void * const extraargs), T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -133,7 +142,7 @@ call_operator (void { #ifndef _OPENMP (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, dstregbbox, extraargs); #else # if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) @@ -166,7 +175,7 @@ call_operator (void dstregbbox.stride()); if (not mydstregbbox.empty()) { (* the_operator) - (src, srcext, dst, dstext, srcbbox, dstbbox, + (src, srcpadext, srcext, dst, dstpadext, dstext, srcbbox, dstbbox, srcregbbox, mydstregbbox, extraargs); # if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) #pragma omp critical @@ -191,8 +200,10 @@ call_operator (void template <typename T> void prolongate_3d_eno (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -207,8 +218,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -217,8 +230,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_eno) template <> void prolongate_3d_eno (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -229,8 +244,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_real8_eno) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -242,8 +259,10 @@ prolongate_3d_eno (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_weno (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -258,8 +277,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -268,8 +289,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_weno) template <> void prolongate_3d_weno (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -279,8 +302,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src, { CCTK_FNAME(prolongate_3d_real8_weno) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -292,8 +317,10 @@ prolongate_3d_weno (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_tvd (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -309,8 +336,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -319,8 +348,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_real8_tvd) template <> void prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -331,8 +362,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_real8_tvd) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -345,8 +378,10 @@ prolongate_3d_tvd (CCTK_REAL8 const * restrict const src, template <typename T> void prolongate_3d_cc_tvd (T const * restrict const /*src*/, + ivect3 const & /*srcpadext*/, ivect3 const & /*srcext*/, T * restrict const /*dst*/, + ivect3 const & /*dstpadext*/, ivect3 const & /*dstext*/, ibbox3 const & /*srcbbox*/, ibbox3 const & /*dstbbox*/, @@ -362,8 +397,10 @@ extern "C" void CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd) (const CCTK_REAL8* src, + const int& srcipadext, const int& srcjpadext, const int& srckpadext, const int& srciext, const int& srcjext, const int& srckext, CCTK_REAL8* dst, + const int& dstipadext, const int& dstjpadext, const int& dstkpadext, const int& dstiext, const int& dstjext, const int& dstkext, const int srcbbox[3][3], const int dstbbox[3][3], @@ -372,8 +409,10 @@ CCTK_FCALL CCTK_FNAME(prolongate_3d_cc_real8_tvd) template <> void prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src, + ivect3 const & srcpadext, ivect3 const & srcext, CCTK_REAL8 * restrict const dst, + ivect3 const & dstpadext, ivect3 const & dstext, ibbox3 const & srcbbox, ibbox3 const & dstbbox, @@ -384,8 +423,10 @@ prolongate_3d_cc_tvd (CCTK_REAL8 const * restrict const src, assert (not extraargs); CCTK_FNAME(prolongate_3d_cc_real8_tvd) (src, + srcpadext[0], srcpadext[1], srcpadext[2], srcext[0], srcext[1], srcext[2], dst, + dstpadext[0], dstpadext[1], dstpadext[2], dstext[0], dstext[1], dstext[2], reinterpret_cast <int const (*) [3]> (& srcbbox), reinterpret_cast <int const (*) [3]> (& dstbbox), @@ -466,16 +507,13 @@ void data<T>::allocate (const ibbox& extent_, // data _extent = extent_; - //_shape = max (ivect(0), _extent.shape() / _extent.stride()); - _shape = allocated_memory_shape (_extent); - assert (all (_shape >= max (ivect(0), _extent.shape() / _extent.stride()))); - - _size = 1; - for (int d=0; d<dim; ++d) { - _stride[d] = _size; - assert (_shape[d]==0 or _size <= numeric_limits<int>::max() / _shape[d]); - _size *= _shape[d]; + _shape = max (ivect(0), _extent.shape() / _extent.stride()); + _padded_shape = pad_shape(_shape); + _stride[0] = 1; + for (int d=1; d<dim; ++d) { + _stride[d] = _stride[d-1] * _padded_shape[d-1]; } + _size = _stride[dim-1] * _padded_shape[dim-1]; _proc = proc_; if (dist::rank() == _proc) { @@ -512,9 +550,7 @@ size_t data<T>::allocsize (const ibbox & extent_, const int proc_) const if (dist::rank() != proc_) return 0; if (vectorindex != 0) return 0; assert (not vectorleader); - ivect const shape_ = allocated_memory_shape (extent_); - //return vectorlength * extent_.size() * sizeof (T); - return vectorlength * prod(shape_) * sizeof (T); + return vectorlength * prod (pad_shape (extent_)) * sizeof (T); } @@ -541,17 +577,17 @@ copy_from_innerloop (gdata const * const gsrc, #if CARPET_DIM == 3 // Don't use call_operator, because we parallelise ourselves copy_3d(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, (void*)slabinfo); #elif CARPET_DIM == 4 // Don't use call_operator, because we parallelise ourselves copy_4d(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, (void*)slabinfo); #else @@ -725,8 +761,10 @@ transfer_prolongate (data const * const src, case vertex_centered: { static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -762,9 +800,9 @@ transfer_prolongate (data const * const src, } call_operator<T> (the_operators[order_space], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -774,9 +812,9 @@ transfer_prolongate (data const * const src, if (use_dgfe) { // Don't use call_operator, because we parallelise ourselves prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast<T *>(this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -784,8 +822,10 @@ transfer_prolongate (data const * const src, } static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -806,9 +846,9 @@ transfer_prolongate (data const * const src, } call_operator<T> (the_operators[order_space], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -835,9 +875,9 @@ transfer_prolongate (data const * const src, case 3: call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -848,9 +888,9 @@ transfer_prolongate (data const * const src, // hydro, so we cheat here. call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -860,13 +900,15 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"ENO\" with order_space!=3"); break; } + break; } - break; case cell_centered: { static void (* the_operators[]) (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -937,18 +979,20 @@ transfer_prolongate (data const * const src, call_operator<T> (the_operators[order_space-2], static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); + break; } - break; + default: + assert(0); } timer.stop (0); + break; } - break; case op_WENO: { static Timer timer ("prolongate_WENO"); timer.start (); @@ -967,9 +1011,9 @@ transfer_prolongate (data const * const src, case 5: call_operator<T> (& prolongate_3d_eno, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -979,15 +1023,15 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"WENO\" with order_space!=5"); break; } - + break; } - break; case cell_centered: { CCTK_WARN (CCTK_WARN_ABORT, "There are currently no cell-centred stencils for op=\"WENO\""); break; } - break; + default: + assert(0); } timer.stop (0); } @@ -1002,9 +1046,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_3d_tvd, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1021,9 +1065,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_3d_cc_tvd, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1035,6 +1079,8 @@ transfer_prolongate (data const * const src, } break; } + default: + assert(0); } timer.stop (0); break; @@ -1055,9 +1101,9 @@ transfer_prolongate (data const * const src, case 5: call_operator<T> (& prolongate_3d_o5_monotone_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1090,9 +1136,9 @@ transfer_prolongate (data const * const src, case 1: call_operator<T> (& prolongate_4d_o1_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1164,9 +1210,9 @@ transfer_restrict (data const * const src, assert (not slabinfo); call_operator<T> (& restrict_3d_rf2, static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcregbox, dstregbox, NULL); @@ -1182,9 +1228,9 @@ transfer_restrict (data const * const src, if (use_dgfe) { // Don't use call_operator, because we parallelise ourselves restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast<T *>(this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); @@ -1194,37 +1240,37 @@ transfer_restrict (data const * const src, transport_operator != op_WENO and transport_operator != op_ENO) { // HACK switch (restriction_order_space) { - case 1: + case 1: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); break; - case 3: + case 3: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_o3_rf2(static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + src->padded_shape(), src->shape(), + static_cast <T *> (this->storage()), + this->padded_shape(), this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); break; - case 5: + case 5: // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_o5_rf2(static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + src->padded_shape(), src->shape(), + static_cast <T *> (this->storage()), + this->padded_shape(), this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); break; - default: + default: CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "There is no restriction stencil with restriction_order_space==%d", restriction_order_space); break; @@ -1233,36 +1279,36 @@ transfer_restrict (data const * const src, } // Don't use call_operator, because we parallelise ourselves restrict_3d_cc_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(0,1,1))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,0,1))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,1,0))) { // Don't use call_operator, because we parallelise ourselves restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); @@ -1291,9 +1337,9 @@ transfer_restrict (data const * const src, case vertex_centered: // Don't use call_operator, because we parallelise ourselves restrict_4d_rf2(static_cast <T const *> (src->storage()), - src->shape(), + src->padded_shape(), src->shape(), static_cast <T *> (this->storage()), - this->shape(), + this->padded_shape(), this->shape(), src->extent(), this->extent(), srcregbox, dstregbox, NULL); @@ -1357,10 +1403,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(0), static_cast <T const *> (srcs.AT(1)->storage()), times.AT(1), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1374,10 +1420,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(1), static_cast <T const *> (srcs.AT(2)->storage()), times.AT(2), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1393,10 +1439,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(2), static_cast <T const *> (srcs.AT(3)->storage()), times.AT(3), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1414,10 +1460,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(3), static_cast <T const *> (srcs.AT(4)->storage()), times.AT(4), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1446,10 +1492,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(0), static_cast <T const *> (srcs.AT(1)->storage()), times.AT(1), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1463,10 +1509,10 @@ time_interpolate (vector <data *> const & srcs, times.AT(1), static_cast <T const *> (srcs.AT(2)->storage()), times.AT(2), - srcs.AT(0)->shape(), + srcs.AT(0)->padded_shape(), srcs.AT(0)->shape(), static_cast <T *> (this->storage()), time, - this->shape(), + this->padded_shape(), this->shape(), srcs.AT(0)->extent(), this->extent(), srcbox, dstbox, NULL); @@ -1535,6 +1581,8 @@ output (ostream & os) T Tdummy; os << "data<" << typestring(Tdummy) << ">:" << "extent=" << extent() << "," + << "shape=" << shape() << "," + << "padded_shape=" << padded_shape() << "," << "stride=" << stride() << ",size=" << size(); return os; } diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index 156e9cbbd..67acbe453 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -19,6 +19,7 @@ #endif #include "bbox.hh" +#include "cacheinfo.hh" #include "commstate.hh" #include "defs.hh" #include "dist.hh" @@ -115,6 +116,7 @@ gdata::~gdata () +#if 0 // Storage management template<int D> @@ -181,6 +183,7 @@ allocated_memory_shape (vect<int,D> shape) done:; return shape; } +#endif @@ -262,8 +265,8 @@ transfer_from (comm_state & state, find_source_timelevel (times, time, order_time, my_transport_operator, timelevel0, ntimelevels); if (is_src) assert (int (srcs.size()) >= ntimelevels); - int const dstpoints = prod(allocated_memory_shape(dstbox)); - int const srcpoints = prod(allocated_memory_shape(srcbox)) * ntimelevels; + int const dstpoints = prod(pad_shape(dstbox)); + int const srcpoints = prod(pad_shape(srcbox)) * ntimelevels; bool const interp_on_src = dstpoints <= srcpoints; int const npoints = interp_on_src ? dstpoints : srcpoints; @@ -289,10 +292,10 @@ transfer_from (comm_state & state, // copy the data into the send buffer if (interp_on_src) { size_t const sendbufsize = - src->c_datatype_size() * prod(allocated_memory_shape(dstbox)); + src->c_datatype_size() * prod(pad_shape(dstbox)); void * const sendbuf = state.send_buffer (src->c_datatype(), dstproc, - prod(allocated_memory_shape(dstbox))); + prod(pad_shape(dstbox))); gdata * const buf = src->make_typed (src->varindex, src->cent, src->transport_operator); buf->allocate (dstbox, srcproc, sendbuf, sendbufsize); @@ -301,14 +304,14 @@ transfer_from (comm_state & state, time, order_space, order_time); delete buf; state.commit_send_space - (src->c_datatype(), dstproc, prod(allocated_memory_shape(dstbox))); + (src->c_datatype(), dstproc, prod(pad_shape(dstbox))); } else { for (int tl = timelevel0; tl < timelevel0 + ntimelevels; ++ tl) { size_t const sendbufsize = - src->c_datatype_size() * prod(allocated_memory_shape(srcbox)); + src->c_datatype_size() * prod(pad_shape(srcbox)); void * const sendbuf = state.send_buffer (src->c_datatype(), dstproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); gdata * const buf = src->make_typed (src->varindex, src->cent, src->transport_operator); @@ -316,7 +319,7 @@ transfer_from (comm_state & state, buf->copy_from_innerloop (srcs.AT(tl), srcbox, srcbox, NULL); delete buf; state.commit_send_space (src->c_datatype(), dstproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); } } } @@ -337,14 +340,13 @@ transfer_from (comm_state & state, // copy from the recv buffer if (interp_on_src) { size_t const recvbufsize = - c_datatype_size() * prod(allocated_memory_shape(dstbox)); + c_datatype_size() * prod(pad_shape(dstbox)); void * const recvbuf = - state.recv_buffer (c_datatype(), srcproc, - prod(allocated_memory_shape(dstbox))); + state.recv_buffer (c_datatype(), srcproc, prod(pad_shape(dstbox))); gdata * const buf = make_typed (varindex, cent, transport_operator); buf->allocate (dstbox, dstproc, recvbuf, recvbufsize); state.commit_recv_space (c_datatype(), srcproc, - prod(allocated_memory_shape(dstbox))); + prod(pad_shape(dstbox))); copy_from_innerloop (buf, dstbox, dstbox, NULL); delete buf; } else { @@ -353,14 +355,14 @@ transfer_from (comm_state & state, vector <CCTK_REAL> timebuf (ntimelevels); for (int tl = 0; tl < ntimelevels; ++ tl) { size_t const recvbufsize = - c_datatype_size() * prod(allocated_memory_shape(srcbox)); + c_datatype_size() * prod(pad_shape(srcbox)); void * const recvbuf = state.recv_buffer (c_datatype(), srcproc, - prod(allocated_memory_shape(srcbox))); + prod(pad_shape(srcbox))); gdata * const buf = make_typed (varindex, cent, transport_operator); buf->allocate (srcbox, dstproc, recvbuf, recvbufsize); state.commit_recv_space - (c_datatype(), srcproc, prod(allocated_memory_shape(srcbox))); + (c_datatype(), srcproc, prod(pad_shape(srcbox))); bufs.AT(tl) = buf; timebuf.AT(tl) = times.AT(timelevel0 + tl); } @@ -469,11 +471,3 @@ allmemory () } return mem; } - - - -template vect<int,3> gdata::allocated_memory_shape (bbox<int,3> const& extent); -template vect<int,3> gdata::allocated_memory_shape (vect<int,3> shape); - -template vect<int,4> gdata::allocated_memory_shape (bbox<int,4> const& extent); -template vect<int,4> gdata::allocated_memory_shape (vect<int,4> shape); diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index ea44bc6fc..129fccc1c 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -71,7 +71,8 @@ protected: int _proc; // stored on processor - ivect _shape, _stride; // shape and index order + ivect _shape; // shape + ivect _padded_shape, _stride; // allocated shape and index order ibbox _extent; // bbox for all data @@ -105,10 +106,6 @@ public: 0; virtual void free () = 0; virtual size_t allocsize (const ibbox& extent, const int proc) const = 0; - template<int D> - static vect<int,D> allocated_memory_shape (bbox<int,D> const& extent); - template<int D> - static vect<int,D> allocated_memory_shape (vect<int,D> shape); // Accessors bool has_storage () const { @@ -145,6 +142,11 @@ public: return _shape; } + const ivect& padded_shape () const { + assert (_has_storage); + return _padded_shape; + } + const ivect& stride () const { assert (_has_storage); return _stride; @@ -164,8 +166,10 @@ public: assert (_has_storage); assert (all((index-extent().lower()) % extent().stride() == 0)); ivect const ind = (index-extent().lower()) / extent().stride(); - assert (all(ind>=0 and ind<=shape())); - return dot(ind, stride()); + assert (all(ind>=0 and ind<shape())); + int const off = dot(ind, stride()); + assert (off>=0 and off<size()); + return off; } private: diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc index 9590632ad..ddfe15254 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -35,9 +36,11 @@ namespace CarpetLib { CCTK_REAL const t1, T const * restrict const src2, CCTK_REAL const t2, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -73,12 +76,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -89,6 +86,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -129,7 +134,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_2tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -142,21 +148,23 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_2tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_2tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc index eaf24bea5..ce8249998 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -37,9 +38,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -75,12 +78,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -91,6 +88,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -133,7 +138,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_3tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -147,23 +153,25 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_3tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_3tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc index 16039c7d8..74bca1837 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -39,9 +40,11 @@ namespace CarpetLib { CCTK_REAL const t3, T const * restrict const src4, CCTK_REAL const t4, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -77,12 +80,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -93,6 +90,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -140,7 +145,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_4tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -155,25 +161,27 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_4tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - T const * restrict const src4, \ - CCTK_REAL const t4, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_4tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + T const * restrict const src4, \ + CCTK_REAL const t4, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc index bb7f4d06d..74913a131 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -41,9 +42,11 @@ namespace CarpetLib { CCTK_REAL const t4, T const * restrict const src5, CCTK_REAL const t5, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -79,12 +82,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -95,6 +92,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -145,7 +150,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3(interpolate_3d_5tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) { dst [DSTIND3(i, j, k)] = @@ -161,27 +167,29 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - interpolate_3d_5tl (T const * restrict const src1, \ - CCTK_REAL const t1, \ - T const * restrict const src2, \ - CCTK_REAL const t2, \ - T const * restrict const src3, \ - CCTK_REAL const t3, \ - T const * restrict const src4, \ - CCTK_REAL const t4, \ - T const * restrict const src5, \ - CCTK_REAL const t5, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - CCTK_REAL const t, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + interpolate_3d_5tl (T const * restrict const src1, \ + CCTK_REAL const t1, \ + T const * restrict const src2, \ + CCTK_REAL const t2, \ + T const * restrict const src3, \ + CCTK_REAL const t3, \ + T const * restrict const src4, \ + CCTK_REAL const t4, \ + T const * restrict const src5, \ + CCTK_REAL const t5, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc index 969ec4bbf..5d56ed3e8 100644 --- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc @@ -8,7 +8,6 @@ #include <loopcontrol.h> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -22,9 +21,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -55,9 +56,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -93,12 +96,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -109,6 +106,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -172,7 +177,8 @@ namespace CarpetLib { // Loop over region #pragma omp parallel CCTK_LOOP3 (interpolate_end_3d_3tl, - i,j,k, 0,0,0, regiext,regjext,regkext, srciext,srcjext,srckext) + i,j,k, 0,0,0, regiext,regjext,regkext, + srcipadext,srcjpadext,srckpadext) { T const s1 = src1 [SRCIND3(i, j, k)]; @@ -209,9 +215,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX8 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX8 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -234,9 +242,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX16 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX16 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -257,9 +267,11 @@ namespace CarpetLib { CCTK_REAL const t2, CCTK_COMPLEX32 const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX32 * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -282,9 +294,11 @@ namespace CarpetLib { CCTK_REAL const t2, \ T const * restrict const src3, \ CCTK_REAL const t3, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ CCTK_REAL const t, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/make.code.defn b/Carpet/CarpetLib/src/make.code.defn index 0956754cc..aab0f5372 100644 --- a/Carpet/CarpetLib/src/make.code.defn +++ b/Carpet/CarpetLib/src/make.code.defn @@ -7,6 +7,7 @@ SRCS = backtrace.cc \ bboxset.cc \ bboxtree.cc \ bintree.cc \ + cacheinfo.cc \ commstate.cc \ data.cc \ defs.cc \ diff --git a/Carpet/CarpetLib/src/mem.cc b/Carpet/CarpetLib/src/mem.cc index 27f86e951..f3a4fa6ff 100644 --- a/Carpet/CarpetLib/src/mem.cc +++ b/Carpet/CarpetLib/src/mem.cc @@ -20,7 +20,12 @@ using namespace std; -double const gmem::MEGA = 1000*1000; +double const gmem::KILO = 1000.0; +double const gmem::MEGA = 1000.0*1000.0; +double const gmem::GIGA = 1000.0*1000.0*1000.0; +double const gmem::TERA = 1000.0*1000.0*1000.0*1000.0; +double const gmem::PETA = 1000.0*1000.0*1000.0*1000.0*1000.0; +double const gmem::EXA = 1000.0*1000.0*1000.0*1000.0*1000.0*1000.0; // Total number of currently allocated bytes and objects double gmem::total_allocated_bytes = 0; @@ -38,7 +43,8 @@ template<typename T> mem<T>:: mem (size_t const vectorlength, size_t const nelems, T * const memptr, size_t const memsize) - : storage_ (memptr), + : storage_base_ (memptr), + storage_ (memptr), nelems_ (nelems), vectorlength_ (vectorlength), owns_storage_ (false), @@ -62,7 +68,10 @@ mem (size_t const vectorlength, size_t const nelems, int(max_allowed_memory_MB)); } try { - storage_ = new T [vectorlength * nelems]; + // TODO: align and pad storage + size_t const padding = 0; + storage_base_ = new T [vectorlength * nelems + padding]; + storage_ = storage_base_ + padding; owns_storage_ = true; } catch (...) { T Tdummy; @@ -77,7 +86,9 @@ mem (size_t const vectorlength, size_t const nelems, total_allocated_bytes += nbytes; max_allocated_bytes = max (max_allocated_bytes, total_allocated_bytes); if (poison_new_memory) { - memset (storage_, poison_value, vectorlength * nelems * sizeof (T)); + memset (storage_base_, + poison_value, (vectorlength * nelems + + storage_ - storage_base_) * sizeof (T)); } } else { assert (memsize >= vectorlength * nelems * sizeof (T)); @@ -96,7 +107,7 @@ mem<T>:: { assert (not has_clients()); if (owns_storage_) { - delete [] storage_; + delete [] storage_base_; const double nbytes = vectorlength_ * nelems_ * sizeof (T); total_allocated_bytes -= nbytes; } @@ -144,13 +155,15 @@ memory () const { return + memoryof (storage_base_) + memoryof (storage_) + memoryof (nelems_) + memoryof (vectorlength_) + memoryof (owns_storage_) + memoryof (clients_) + memoryof (num_clients_) + - (owns_storage_ ? sizeof (T) * vectorlength_ * nelems_ : 0); + (owns_storage_ ? (vectorlength_ * nelems_ + + storage_ - storage_base_) : 0) * sizeof (T); } diff --git a/Carpet/CarpetLib/src/mem.hh b/Carpet/CarpetLib/src/mem.hh index ff7f05080..807daff70 100644 --- a/Carpet/CarpetLib/src/mem.hh +++ b/Carpet/CarpetLib/src/mem.hh @@ -19,7 +19,12 @@ class gmem { public: + static double const KILO; static double const MEGA; + static double const GIGA; + static double const TERA; + static double const PETA; + static double const EXA ; // Total number of currently allocated bytes and objects static double total_allocated_bytes; @@ -35,6 +40,7 @@ public: template<typename T> class mem: public gmem { + T * storage_base_; T * storage_; size_t nelems_; size_t vectorlength_; diff --git a/Carpet/CarpetLib/src/operator_prototypes_3d.hh b/Carpet/CarpetLib/src/operator_prototypes_3d.hh index 6b344b7c3..6d8850726 100644 --- a/Carpet/CarpetLib/src/operator_prototypes_3d.hh +++ b/Carpet/CarpetLib/src/operator_prototypes_3d.hh @@ -22,6 +22,7 @@ namespace CarpetLib { static inline size_t index3 (size_t const i, size_t const j, size_t const k, + size_t const padexti, size_t const padextj, size_t const padextk, size_t const exti, size_t const extj, size_t const extk) { #ifdef CARPET_DEBUG @@ -30,7 +31,7 @@ namespace CarpetLib { assert (static_cast <ptrdiff_t> (k) >= 0 and k < extk); #endif - return i + exti * (j + extj * k); + return i + padexti * (j + padextj * k); } @@ -56,8 +57,10 @@ namespace CarpetLib { template <typename T> void copy_3d (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -70,8 +73,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -79,87 +84,15 @@ namespace CarpetLib { ibbox3 const & restrict dstregbbox, void * extraargs); -#if 0 - template <typename T> - void - prolongate_3d_o1_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o3_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o5_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o7_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o9_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_o11_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); -#endif - template <typename T> void prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -172,8 +105,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -181,49 +116,13 @@ namespace CarpetLib { ibbox3 const & restrict dstregbbox, void * extraargs); -#if 0 - template <typename T> - void - prolongate_3d_cc_o0_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_cc_o1_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); - - template <typename T> - void - prolongate_3d_cc_o2_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict srcregbbox, - ibbox3 const & restrict dstregbbox, - void * extraargs); -#endif - template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -235,8 +134,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -260,8 +161,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_dgfe_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -274,8 +177,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -291,9 +196,11 @@ namespace CarpetLib { CCTK_REAL const t1, T const * restrict const src2, CCTK_REAL const t2, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -309,9 +216,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -329,9 +238,11 @@ namespace CarpetLib { CCTK_REAL const t3, T const * restrict const src4, CCTK_REAL const t4, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -351,9 +262,11 @@ namespace CarpetLib { CCTK_REAL const t4, T const * restrict const src5, CCTK_REAL const t5, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -369,9 +282,11 @@ namespace CarpetLib { CCTK_REAL const t2, T const * restrict const src3, CCTK_REAL const t3, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, CCTK_REAL const t, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -384,8 +299,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -398,8 +315,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o3_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -412,8 +331,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o5_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -426,8 +347,10 @@ namespace CarpetLib { template <typename T, int centi, int centj, int centk> void restrict_3d_vc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -440,8 +363,10 @@ namespace CarpetLib { template <typename T, int ORDER> void restrict_3d_dgfe_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, diff --git a/Carpet/CarpetLib/src/operator_prototypes_4d.hh b/Carpet/CarpetLib/src/operator_prototypes_4d.hh index 1d0a4609a..71d1e3bcd 100644 --- a/Carpet/CarpetLib/src/operator_prototypes_4d.hh +++ b/Carpet/CarpetLib/src/operator_prototypes_4d.hh @@ -22,6 +22,7 @@ namespace CarpetLib { static inline size_t index4 (size_t const i, size_t const j, size_t const k, size_t const l, + size_t const padexti, size_t const padextj, size_t const padextk, size_t const padextl, size_t const exti, size_t const extj, size_t const extk, size_t const extl) { #ifdef CARPET_DEBUG @@ -31,7 +32,7 @@ namespace CarpetLib { assert (static_cast <ptrdiff_t> (l) >= 0 and l < extl); #endif - return i + exti * (j + extj * (k + extk * l)); + return i + padexti * (j + padextj * (k + padextk * l)); } @@ -47,8 +48,10 @@ namespace CarpetLib { template <typename T> void copy_4d (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -61,8 +64,10 @@ namespace CarpetLib { template <typename T> void prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -75,8 +80,10 @@ namespace CarpetLib { template <typename T> void restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc index 5cd8fdd2c..7b5c6cee2 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -43,13 +42,22 @@ using namespace std; namespace CarpetLib { - -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -947,8 +955,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_eno_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -1033,13 +1043,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -1283,8 +1295,10 @@ namespace CarpetLib { template <> \ void \ prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1299,8 +1313,10 @@ namespace CarpetLib { template <> \ void \ prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1324,8 +1340,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_eno_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -1337,8 +1355,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_eno_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 index b39f43766..c970e41e6 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_real8_tvd.F90 @@ -52,18 +52,21 @@ -subroutine prolongate_3d_cc_real8_tvd (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_cc_real8_tvd ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8, parameter :: one=1, fourth=one/4 + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) ! bbox(:,1) is lower boundary (inclusive) ! bbox(:,2) is upper boundary (inclusive) ! bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc index bf960f0aa..269b34947 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -43,13 +42,22 @@ using namespace std; namespace CarpetLib { - -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -320,8 +328,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -406,13 +416,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -656,8 +668,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -668,8 +682,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -680,8 +696,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,2> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -692,20 +710,24 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); \ \ template \ void \ prolongate_3d_cc_rf2<T,4> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -716,8 +738,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_cc_rf2<T,5> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc index 640b084c2..3be587551 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc @@ -20,18 +20,30 @@ using namespace hrscc; namespace CarpetLib { -#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext)) -#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext)) -#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext)) -#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext)) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) template<typename T, int ORDER> void prolongate_3d_dgfe_rf2(T const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, T *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, @@ -98,6 +110,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -143,8 +163,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const i=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -182,8 +203,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const j=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -221,8 +243,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const k=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -257,9 +280,10 @@ namespace CarpetLib { // Loop over fine region #ifdef HRSCC_HH #pragma omp parallel for //collapse(3) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_full (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr); @@ -281,8 +305,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_dgfe_rf2<T,5>(T const *restrict const src, \ + ivect3 const& restrict srcpadext, \ ivect3 const& restrict srcext, \ T *restrict const dst, \ + ivect3 const& restrict dstpadext, \ ivect3 const& restrict dstext, \ ibbox3 const& restrict srcbbox, \ ibbox3 const& restrict dstbbox, \ @@ -300,8 +326,10 @@ namespace CarpetLib { template<> void prolongate_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, CCTK_COMPLEX *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, diff --git a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc index adc4d0693..48fe2f31b 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_o5_monotone_rf2.cc @@ -23,7 +23,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -37,11 +36,14 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) + template <typename T> inline @@ -83,8 +85,10 @@ namespace CarpetLib { template <typename T> void prolongate_3d_o5_monotone_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -137,13 +141,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -827,8 +833,10 @@ namespace CarpetLib { template <> void prolongate_3d_o5_monotone_rf2 (CCTK_COMPLEX const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, CCTK_COMPLEX * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -843,8 +851,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_o5_monotone_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 index fbff861c4..94a9e60f1 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 @@ -75,19 +75,22 @@ function eno1d(q) end function eno1d -subroutine prolongate_3d_real8_eno (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_eno ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) !!$ bbox(:,1) is lower boundary (inclusive) !!$ bbox(:,2) is upper boundary (inclusive) !!$ bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 index f58cabf98..230a20bd5 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_tvd.F90 @@ -26,19 +26,22 @@ -subroutine prolongate_3d_real8_tvd (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_tvd ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) ! bbox(:,1) is lower boundary (inclusive) ! bbox(:,2) is upper boundary (inclusive) ! bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 index d5ec43427..044132a10 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 @@ -155,19 +155,22 @@ function weno1d(q) end function weno1d -subroutine prolongate_3d_real8_weno (src, srciext, srcjext, & - srckext, dst, dstiext, dstjext, dstkext, srcbbox, & - dstbbox, regbbox) +subroutine prolongate_3d_real8_weno ( & + src, srcipadext, srcjpadext, srckpadext, srciext, srcjext, srckext, & + dst, dstipadext, dstjpadext, dstkpadext, dstiext, dstjext, dstkext, & + srcbbox, dstbbox, regbbox) implicit none CCTK_REAL8 one parameter (one = 1) + integer srcipadext, srcjpadext, srckpadext integer srciext, srcjext, srckext - CCTK_REAL8 src(srciext,srcjext,srckext) + CCTK_REAL8 src(srcipadext,srcjpadext,srckpadext) + integer dstipadext, dstjpadext, dstkpadext integer dstiext, dstjext, dstkext - CCTK_REAL8 dst(dstiext,dstjext,dstkext) + CCTK_REAL8 dst(dstipadext,dstjpadext,dstkpadext) !!$ bbox(:,1) is lower boundary (inclusive) !!$ bbox(:,2) is upper boundary (inclusive) !!$ bbox(:,3) is stride diff --git a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc index 984d68c07..cdf24a985 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_rf2.cc @@ -8,7 +8,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "vectors.h" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -21,10 +20,20 @@ namespace CarpetLib { -#define SRCIND3(i,j,k) index3 (i, j, k, srciext, srcjext, srckext) -#define DSTIND3(i,j,k) index3 (i, j, k, dstiext, dstjext, dstkext) -#define SRCOFF3(i,j,k) offset3 (i, j, k, srciext, srcjext, srckext) -#define DSTOFF3(i,j,k) offset3 (i, j, k, dstiext, dstjext, dstkext) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) @@ -473,8 +482,10 @@ namespace CarpetLib { template <typename T, int ORDER> void prolongate_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -550,13 +561,15 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -801,8 +814,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -813,8 +828,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,3> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -825,8 +842,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,5> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -837,8 +856,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,7> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -849,8 +870,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,9> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -861,8 +884,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_rf2<T,11> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc index 1f7e8203e..58b22f719 100644 --- a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc @@ -6,7 +6,6 @@ #include <cmath> #include <cstdlib> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -18,11 +17,13 @@ namespace CarpetLib { -#define SRCIND4(i,j,k,l) \ - index4 (i, j, k, l, \ +#define SRCIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) -#define DSTIND4(i,j,k,l) \ - index4 (i, j, k, l, \ +#define DSTIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -30,8 +31,10 @@ namespace CarpetLib { template <typename T> void prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -84,13 +87,17 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } + size_t const srcipadext = srcpadext[0]; + size_t const srcjpadext = srcpadext[1]; + size_t const srckpadext = srcpadext[2]; + size_t const srclpadext = srcpadext[3]; + + size_t const dstipadext = dstpadext[0]; + size_t const dstjpadext = dstpadext[1]; + size_t const dstkpadext = dstpadext[2]; + size_t const dstlpadext = dstpadext[3]; size_t const srciext = srcext[0]; size_t const srcjext = srcext[1]; @@ -593,8 +600,10 @@ namespace CarpetLib { template \ void \ prolongate_4d_o1_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc index 2f544110a..981398ae3 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -38,14 +39,16 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o3_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict, - ibbox3 const & restrict regbbox, - void * extraargs) + ivect3 const & restrict srcpadext, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstpadext, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict, + ibbox3 const & restrict regbbox, + void * extraargs) { assert (not extraargs); @@ -80,12 +83,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -101,6 +98,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -235,18 +240,20 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_o3_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ - void * extraargs); +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_o3_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ + void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc index a90ebb868..6c90daa44 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc @@ -19,9 +19,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) #define SRCOFF3(i,j,k) \ offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ @@ -107,14 +109,16 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_o5_rf2 (T const * restrict const src, - ivect3 const & restrict srcext, - T * restrict const dst, - ivect3 const & restrict dstext, - ibbox3 const & restrict srcbbox, - ibbox3 const & restrict dstbbox, - ibbox3 const & restrict, - ibbox3 const & restrict regbbox, - void * extraargs) + ivect3 const & restrict srcpadext, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstpadext, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict, + ibbox3 const & restrict regbbox, + void * extraargs) { assert (not extraargs); @@ -149,12 +153,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -170,6 +168,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -239,18 +245,20 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_o5_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ - void * extraargs); +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_o5_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ + void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index dd419cb92..13b59a7f8 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -29,8 +30,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_cc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -71,12 +74,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -92,6 +89,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -127,6 +132,8 @@ namespace CarpetLib { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { + // TODO: Introduce higher-order restriction operators (but + // don't use these for hydro!) dst [DSTIND3(i, j, k)] = + f1*f1*f1 * src [SRCIND3(2*i , 2*j , 2*k )] + f2*f1*f1 * src [SRCIND3(2*i+1, 2*j , 2*k )] @@ -145,17 +152,19 @@ namespace CarpetLib { -#define TYPECASE(N,T) \ - template \ - void \ - restrict_3d_cc_rf2 (T const * restrict const src, \ - ivect3 const & restrict srcext, \ - T * restrict const dst, \ - ivect3 const & restrict dstext, \ - ibbox3 const & restrict srcbbox, \ - ibbox3 const & restrict dstbbox, \ - ibbox3 const & restrict, \ - ibbox3 const & restrict regbbox, \ +#define TYPECASE(N,T) \ + template \ + void \ + restrict_3d_cc_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ + ivect3 const & restrict srcext, \ + T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ + ivect3 const & restrict dstext, \ + ibbox3 const & restrict srcbbox, \ + ibbox3 const & restrict dstbbox, \ + ibbox3 const & restrict, \ + ibbox3 const & restrict regbbox, \ void * extraargs); #include "typecase.hh" #undef TYPECASE diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc index 4b2c0c648..e438d360b 100644 --- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc @@ -20,18 +20,30 @@ using namespace hrscc; namespace CarpetLib { -#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext)) -#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext)) -#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext)) -#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext)) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) template<typename T, int ORDER> void restrict_3d_dgfe_rf2(T const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, T *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, @@ -82,6 +94,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -127,9 +147,10 @@ namespace CarpetLib { // Loop over coarse region #ifdef HRSCC_HH #pragma omp parallel for //collapse(3) - for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) { - for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) { - for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=ORDER+1) { + for (int j=0; j<regjext; j+=ORDER+1) { + for (int i=0; i<regiext; i+=ORDER+1) { GLLElement<ORDER>::restrict_full (&src[SRCIND3(srcioff+2*i, srcjoff+2*j, srckoff+2*k)], srcstr, &dst[DSTIND3(dstioff+i, dstjoff+j, dstkoff+k)], dststr); @@ -149,8 +170,10 @@ namespace CarpetLib { template \ void \ restrict_3d_dgfe_rf2<T,5>(T const *restrict const src, \ + ivect3 const& restrict srcpadext, \ ivect3 const& restrict srcext, \ T *restrict const dst, \ + ivect3 const& restrict dstpadext, \ ivect3 const& restrict dstext, \ ibbox3 const& restrict srcbbox, \ ibbox3 const& restrict dstbbox, \ @@ -168,8 +191,10 @@ namespace CarpetLib { template<> void restrict_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, CCTK_COMPLEX *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc index 5fff24a3e..c1dade7fb 100644 --- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void restrict_3d_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -67,12 +70,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -85,6 +82,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -126,8 +131,10 @@ namespace CarpetLib { template \ void \ restrict_3d_rf2 (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc index 827fb5dff..44799a60f 100644 --- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc @@ -5,7 +5,6 @@ #include <cassert> #include <cmath> -#include "gdata.hh" #include "operator_prototypes_3d.hh" #include "typeprops.hh" @@ -19,9 +18,11 @@ namespace CarpetLib { #define SRCIND3(i,j,k) \ index3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ + srcipadext, srcjpadext, srckpadext, \ srciext, srcjext, srckext) #define DSTIND3(i,j,k) \ index3 (dstioff + (i), dstjoff + (j), dstkoff + (k), \ + dstipadext, dstjpadext, dstkpadext, \ dstiext, dstjext, dstkext) #define SRCOFF3(i,j,k) \ offset3 (srcioff + (i), srcjoff + (j), srckoff + (k), \ @@ -136,8 +137,10 @@ namespace CarpetLib { template <typename T, int centi, int centj, int centk> void restrict_3d_vc_rf2 (T const * restrict const src, + ivect3 const & restrict srcpadext, ivect3 const & restrict srcext, T * restrict const dst, + ivect3 const & restrict dstpadext, ivect3 const & restrict dstext, ibbox3 const & restrict srcbbox, ibbox3 const & restrict dstbbox, @@ -188,12 +191,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect3 const regext = regbbox.shape() / regbbox.stride(); @@ -217,6 +214,14 @@ namespace CarpetLib { + int const srcipadext = srcpadext[0]; + int const srcjpadext = srcpadext[1]; + int const srckpadext = srcpadext[2]; + + int const dstipadext = dstpadext[0]; + int const dstjpadext = dstpadext[1]; + int const dstkpadext = dstpadext[2]; + int const srciext = srcext[0]; int const srcjext = srcext[1]; int const srckext = srcext[2]; @@ -284,8 +289,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,0,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -295,8 +302,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,0,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -306,8 +315,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,1,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -317,8 +328,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,0,1,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -328,8 +341,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,0,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -339,8 +354,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,0,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -350,8 +367,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,1,0> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ @@ -361,8 +380,10 @@ namespace CarpetLib { template \ void \ restrict_3d_vc_rf2<T,1,1,1> (T const * restrict const src, \ + ivect3 const & restrict srcpadext, \ ivect3 const & restrict srcext, \ T * restrict const dst, \ + ivect3 const & restrict dstpadext, \ ivect3 const & restrict dstext, \ ibbox3 const & restrict srcbbox, \ ibbox3 const & restrict dstbbox, \ diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc index 01b66ee78..bd5d7079c 100644 --- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc @@ -7,7 +7,6 @@ #include <cstdlib> #include <iostream> -#include "gdata.hh" #include "operator_prototypes_4d.hh" #include "typeprops.hh" @@ -21,9 +20,11 @@ namespace CarpetLib { #define SRCIND4(i,j,k,l) \ index4 (srcioff + (i), srcjoff + (j), srckoff + (k), srcloff + (l), \ + srcipadext, srcjpadext, srckpadext, srclpadext, \ srciext, srcjext, srckext, srclext) #define DSTIND4(i,j,k,l) \ index4 (dstioff + (i), dstjoff + (j), dstkoff + (k), dstloff + (l), \ + dstipadext, dstjpadext, dstkpadext, dstlpadext, \ dstiext, dstjext, dstkext, dstlext) @@ -31,8 +32,10 @@ namespace CarpetLib { template <typename T> void restrict_4d_rf2 (T const * restrict const src, + ivect4 const & restrict srcpadext, ivect4 const & restrict srcext, T * restrict const dst, + ivect4 const & restrict dstpadext, ivect4 const & restrict dstext, ibbox4 const & restrict srcbbox, ibbox4 const & restrict dstbbox, @@ -67,12 +70,6 @@ namespace CarpetLib { CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); } - if (any (srcext != gdata::allocated_memory_shape(srcbbox.shape() / srcbbox.stride()) or - dstext != gdata::allocated_memory_shape(dstbbox.shape() / dstbbox.stride()))) - { - CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); - } - ivect4 const regext = regbbox.shape() / regbbox.stride(); @@ -83,6 +80,16 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + ptrdiff_t const srclpadext = srcpadext[3]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const dstlpadext = dstpadext[3]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -132,8 +139,10 @@ namespace CarpetLib { template \ void \ restrict_4d_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcpadext, \ ivect4 const & restrict srcext, \ T * restrict const dst, \ + ivect4 const & restrict dstpadext, \ ivect4 const & restrict dstext, \ ibbox4 const & restrict srcbbox, \ ibbox4 const & restrict dstbbox, \ |