diff options
Diffstat (limited to 'Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc')
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc | 54 |
1 files changed, 41 insertions, 13 deletions
diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc index 640b084c2..3be587551 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc @@ -20,18 +20,30 @@ using namespace hrscc; namespace CarpetLib { -#define SRCIND3(i,j,k) ptrdiff_t(index3(i, j, k, srciext, srcjext, srckext)) -#define DSTIND3(i,j,k) ptrdiff_t(index3(i, j, k, dstiext, dstjext, dstkext)) -#define SRCOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, srciext, srcjext, srckext)) -#define DSTOFF3(i,j,k) ptrdiff_t(offset3(i, j, k, dstiext, dstjext, dstkext)) +#define SRCIND3(i,j,k) \ + index3 (i, j, k, \ + srcipadext, srcjpadext, srckpadext, \ + srciext, srcjext, srckext) +#define DSTIND3(i,j,k) \ + index3 (i, j, k, \ + dstipadext, dstjpadext, dstkpadext, \ + dstiext, dstjext, dstkext) +#define SRCOFF3(i,j,k) \ + offset3 (i, j, k, \ + srciext, srcjext, srckext) +#define DSTOFF3(i,j,k) \ + offset3 (i, j, k, \ + dstiext, dstjext, dstkext) template<typename T, int ORDER> void prolongate_3d_dgfe_rf2(T const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, T *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, @@ -98,6 +110,14 @@ namespace CarpetLib { + ptrdiff_t const srcipadext = srcpadext[0]; + ptrdiff_t const srcjpadext = srcpadext[1]; + ptrdiff_t const srckpadext = srcpadext[2]; + + ptrdiff_t const dstipadext = dstpadext[0]; + ptrdiff_t const dstjpadext = dstpadext[1]; + ptrdiff_t const dstkpadext = dstpadext[2]; + ptrdiff_t const srciext = srcext[0]; ptrdiff_t const srcjext = srcext[1]; ptrdiff_t const srckext = srcext[2]; @@ -143,8 +163,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const i=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -182,8 +203,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const j=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -221,8 +243,9 @@ namespace CarpetLib { #ifdef HRSCC_HH ptrdiff_t const k=0; #pragma omp parallel for //collapse(2) - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr2d); @@ -257,9 +280,10 @@ namespace CarpetLib { // Loop over fine region #ifdef HRSCC_HH #pragma omp parallel for //collapse(3) - for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { + // Zwicky's Intel compiler ices on ptrdiff_t + for (int k=0; k<regkext; k+=2*(ORDER+1)) { + for (int j=0; j<regjext; j+=2*(ORDER+1)) { + for (int i=0; i<regiext; i+=2*(ORDER+1)) { GLLElement<ORDER>::prolongate_full (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr, &dst[DSTIND3(dstioff+2*i, dstjoff+2*j, dstkoff+2*k)], dststr); @@ -281,8 +305,10 @@ namespace CarpetLib { template \ void \ prolongate_3d_dgfe_rf2<T,5>(T const *restrict const src, \ + ivect3 const& restrict srcpadext, \ ivect3 const& restrict srcext, \ T *restrict const dst, \ + ivect3 const& restrict dstpadext, \ ivect3 const& restrict dstext, \ ibbox3 const& restrict srcbbox, \ ibbox3 const& restrict dstbbox, \ @@ -300,8 +326,10 @@ namespace CarpetLib { template<> void prolongate_3d_dgfe_rf2<CCTK_COMPLEX,5>(CCTK_COMPLEX const *restrict const src, + ivect3 const& restrict srcpadext, ivect3 const& restrict srcext, CCTK_COMPLEX *restrict const dst, + ivect3 const& restrict dstpadext, ivect3 const& restrict dstext, ibbox3 const& restrict srcbbox, ibbox3 const& restrict dstbbox, |