diff options
Diffstat (limited to 'Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc')
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc | 602 |
1 files changed, 602 insertions, 0 deletions
diff --git a/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc new file mode 100644 index 000000000..4c8022916 --- /dev/null +++ b/Carpet/CarpetLib/src/prolongate_4d_o1_rf2.cc @@ -0,0 +1,602 @@ +#include <algorithm> +#include <cassert> +#include <cmath> +#include <cstdlib> + +#include <cctk.h> +#include <cctk_Parameters.h> + +#include "operator_prototypes_4d.hh" +#include "typeprops.hh" + +using namespace std; + + + +namespace CarpetLib { + + + +#define SRCIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + srciext, srcjext, srckext, srclext) +#define DSTIND4(i,j,k,l) \ + index4 (i, j, k, l, \ + dstiext, dstjext, dstkext, dstlext) + + + + template <typename T> + void + prolongate_4d_o1_rf2 (T const * restrict const src, + ivect4 const & restrict srcext, + T * restrict const dst, + ivect4 const & restrict dstext, + ibbox4 const & restrict srcbbox, + ibbox4 const & restrict dstbbox, + ibbox4 const & restrict regbbox) + { + typedef typename typeprops<T>::real RT; + + + + if (any (srcbbox.stride() <= regbbox.stride() or + dstbbox.stride() != regbbox.stride())) + { + CCTK_WARN (0, "Internal error: strides disagree"); + } + + if (any (srcbbox.stride() != reffact2 * dstbbox.stride())) { + CCTK_WARN (0, "Internal error: source strides are not twice the destination strides"); + } + + // This could be handled, but is likely to point to an error + // elsewhere + if (regbbox.empty()) { + CCTK_WARN (0, "Internal error: region extent is empty"); + } + + + + ivect4 const regext = regbbox.shape() / regbbox.stride(); + assert (all ((regbbox.lower() - srcbbox.lower()) % regbbox.stride() == 0)); + ivect4 const srcoff = (regbbox.lower() - srcbbox.lower()) / regbbox.stride(); + assert (all ((regbbox.lower() - dstbbox.lower()) % regbbox.stride() == 0)); + ivect4 const dstoff = (regbbox.lower() - dstbbox.lower()) / regbbox.stride(); + + + + bvect4 const needoffsetlo = srcoff % reffact2 != 0 or regext > 1; + bvect4 const needoffsethi = (srcoff + regext - 1) % reffact2 != 0 or regext > 1; + ivect4 const offsetlo = either (needoffsetlo, 1, 0); + ivect4 const offsethi = either (needoffsethi, 1, 0); + + + + if (not regbbox.expand(offsetlo, offsethi).is_contained_in(srcbbox) or + not regbbox .is_contained_in(dstbbox)) + { + CCTK_WARN (0, "Internal error: region extent is not contained in array extent"); + } + + if (any (srcext != srcbbox.shape() / srcbbox.stride() or + dstext != dstbbox.shape() / dstbbox.stride())) + { + CCTK_WARN (0, "Internal error: array sizes don't agree with bounding boxes"); + } + + + + size_t const srciext = srcext[0]; + size_t const srcjext = srcext[1]; + size_t const srckext = srcext[2]; + size_t const srclext = srcext[3]; + + size_t const dstiext = dstext[0]; + size_t const dstjext = dstext[1]; + size_t const dstkext = dstext[2]; + size_t const dstlext = dstext[3]; + + size_t const regiext = regext[0]; + size_t const regjext = regext[1]; + size_t const regkext = regext[2]; + size_t const reglext = regext[3]; + + size_t const srcioff = srcoff[0]; + size_t const srcjoff = srcoff[1]; + size_t const srckoff = srcoff[2]; + size_t const srcloff = srcoff[3]; + + size_t const dstioff = dstoff[0]; + size_t const dstjoff = dstoff[1]; + size_t const dstkoff = dstoff[2]; + size_t const dstloff = dstoff[3]; + + + + size_t const fi = srcioff % 2; + size_t const fj = srcjoff % 2; + size_t const fk = srckoff % 2; + size_t const fl = srcloff % 2; + + size_t const i0 = srcioff / 2; + size_t const j0 = srcjoff / 2; + size_t const k0 = srckoff / 2; + size_t const l0 = srcloff / 2; + + RT const one = 1; + + RT const f1 = one/2; + RT const f2 = one/2; + + + + // Loop over fine region + // Label scheme: l 8 fl fk fj fi + + size_t is, js, ks, ls; + size_t id, jd, kd, ld; + size_t i, j, k, l; + + // begin l loop + l = 0; + ls = l0; + ld = dstloff; + if (fl == 0) goto l80; + goto l81; + + // begin k loop + l80: + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l800; + goto l801; + + // begin j loop + l800: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8000; + goto l8001; + + // begin i loop + l8000: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80000; + goto l80001; + + // kernel + l80000: + dst[DSTIND4(id,jd,kd,ld)] = + + src[SRCIND4(is,js,ks,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80001; + goto l9000; + + // kernel + l80001: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is ,js,ks,ls)] + + f2 * src[SRCIND4(is+1,js,ks,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80000; + goto l9000; + + // end i loop + l9000: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8001; + goto l900; + + // begin i loop + l8001: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80010; + goto l80011; + + // kernel + l80010: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js ,ks,ls)] + + f2 * src[SRCIND4(is,js+1,ks,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80011; + goto l9001; + + // kernel + l80011: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js ,ks,ls)] + + f2*f1 * src[SRCIND4(is+1,js ,ks,ls)] + + f1*f2 * src[SRCIND4(is ,js+1,ks,ls)] + + f2*f2 * src[SRCIND4(is+1,js+1,ks,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80010; + goto l9001; + + // end i loop + l9001: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8000; + goto l900; + + // end j loop + l900: + k = k+1; + kd = kd+1; + if (k < regkext) goto l800; + goto l90; + + // begin j loop + l801: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8010; + goto l8011; + + // begin i loop + l8010: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80100; + goto l80101; + + // kernel + l80100: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js,ks ,ls)] + + f2 * src[SRCIND4(is,js,ks+1,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80101; + goto l9010; + + // kernel + l80101: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js,ks ,ls)] + + f2*f1 * src[SRCIND4(is+1,js,ks ,ls)] + + f1*f2 * src[SRCIND4(is ,js,ks+1,ls)] + + f2*f2 * src[SRCIND4(is+1,js,ks+1,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80100; + goto l9010; + + // end i loop + l9010: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8011; + goto l901; + + // begin i loop + l8011: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l80110; + goto l80111; + + // kernel + l80110: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js ,ks ,ls)] + + f2*f1 * src[SRCIND4(is,js+1,ks ,ls)] + + f1*f2 * src[SRCIND4(is,js ,ks+1,ls)] + + f2*f2 * src[SRCIND4(is,js+1,ks+1,ls)]; + i = i+1; + id = id+1; + if (i < regiext) goto l80111; + goto l9011; + + // kernel + l80111: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls)] + + f2*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls)] + + f1*f2*f1 * src[SRCIND4(is ,js+1,ks ,ls)] + + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks ,ls)] + + f1*f1*f2 * src[SRCIND4(is ,js ,ks+1,ls)] + + f2*f1*f2 * src[SRCIND4(is+1,js ,ks+1,ls)] + + f1*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls)] + + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l80110; + goto l9011; + + // end i loop + l9011: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8010; + goto l901; + + // end j loop + l901: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l800; + goto l90; + + // end k loop + l90: + l = l+1; + ld = ld+1; + ls = ls+1; + if (l < reglext) goto l81; + goto l80; + + // begin k loop + l81: + k = 0; + ks = k0; + kd = dstkoff; + if (fk == 0) goto l810; + goto l811; + + // begin j loop + l810: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8100; + goto l8101; + + // begin i loop + l8100: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81000; + goto l81001; + + // kernel + l81000: + dst[DSTIND4(id,jd,kd,ld)] = + + f1 * src[SRCIND4(is,js,ks,ls )] + + f2 * src[SRCIND4(is,js,ks,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81001; + goto l9100; + + // kernel + l81001: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is ,js,ks,ls )] + + f2*f1 * src[SRCIND4(is+1,js,ks,ls )] + + f1*f2 * src[SRCIND4(is ,js,ks,ls+1)] + + f2*f2 * src[SRCIND4(is+1,js,ks,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81000; + goto l9100; + + // end i loop + l9100: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8101; + goto l910; + + // begin i loop + l8101: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81010; + goto l81011; + + // kernel + l81010: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js ,ks,ls )] + + f2*f1 * src[SRCIND4(is,js+1,ks,ls )] + + f1*f2 * src[SRCIND4(is,js ,ks,ls+1)] + + f2*f2 * src[SRCIND4(is,js+1,ks,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81011; + goto l9101; + + // kernel + l81011: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js ,ks,ls )] + + f2*f1*f1 * src[SRCIND4(is+1,js ,ks,ls )] + + f1*f2*f1 * src[SRCIND4(is ,js+1,ks,ls )] + + f2*f2*f1 * src[SRCIND4(is+1,js+1,ks,ls )] + + f1*f1*f2 * src[SRCIND4(is ,js ,ks,ls+1)] + + f2*f1*f2 * src[SRCIND4(is+1,js ,ks,ls+1)] + + f1*f2*f2 * src[SRCIND4(is ,js+1,ks,ls+1)] + + f2*f2*f2 * src[SRCIND4(is+1,js+1,ks,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81010; + goto l9101; + + // end i loop + l9101: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8100; + goto l910; + + // end j loop + l910: + k = k+1; + kd = kd+1; + if (k < regkext) goto l810; + goto l91; + + // begin j loop + l811: + j = 0; + js = j0; + jd = dstjoff; + if (fj == 0) goto l8110; + goto l8111; + + // begin i loop + l8110: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81100; + goto l81101; + + // kernel + l81100: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1 * src[SRCIND4(is,js,ks ,ls )] + + f2*f1 * src[SRCIND4(is,js,ks+1,ls )] + + f1*f2 * src[SRCIND4(is,js,ks ,ls+1)] + + f2*f2 * src[SRCIND4(is,js,ks+1,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81101; + goto l9110; + + // kernel + l81101: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1 * src[SRCIND4(is ,js,ks ,ls )] + + f2*f1*f1 * src[SRCIND4(is+1,js,ks ,ls )] + + f1*f2*f1 * src[SRCIND4(is ,js,ks+1,ls )] + + f2*f2*f1 * src[SRCIND4(is+1,js,ks+1,ls )] + + f1*f1*f2 * src[SRCIND4(is ,js,ks ,ls+1)] + + f2*f1*f2 * src[SRCIND4(is+1,js,ks ,ls+1)] + + f1*f2*f2 * src[SRCIND4(is ,js,ks+1,ls+1)] + + f2*f2*f2 * src[SRCIND4(is+1,js,ks+1,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81100; + goto l9110; + + // end i loop + l9110: + j = j+1; + jd = jd+1; + if (j < regjext) goto l8111; + goto l911; + + // begin i loop + l8111: + i = 0; + is = i0; + id = dstioff; + if (fi == 0) goto l81110; + goto l81111; + + // kernel + l81110: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1*f1 * src[SRCIND4(is,js ,ks ,ls )] + + f2*f1*f1*f1 * src[SRCIND4(is,js+1,ks ,ls )] + + f1*f2*f2*f1 * src[SRCIND4(is,js ,ks+1,ls )] + + f2*f2*f2*f1 * src[SRCIND4(is,js+1,ks+1,ls )] + + f1*f1*f1*f2 * src[SRCIND4(is,js ,ks ,ls+1)] + + f2*f1*f1*f2 * src[SRCIND4(is,js+1,ks ,ls+1)] + + f1*f2*f2*f2 * src[SRCIND4(is,js ,ks+1,ls+1)] + + f2*f2*f2*f2 * src[SRCIND4(is,js+1,ks+1,ls+1)]; + i = i+1; + id = id+1; + if (i < regiext) goto l81111; + goto l9111; + + // kernel + l81111: + dst[DSTIND4(id,jd,kd,ld)] = + + f1*f1*f1*f1 * src[SRCIND4(is ,js ,ks ,ls )] + + f2*f1*f1*f1 * src[SRCIND4(is+1,js ,ks ,ls )] + + f1*f2*f1*f1 * src[SRCIND4(is ,js+1,ks ,ls )] + + f2*f2*f1*f1 * src[SRCIND4(is+1,js+1,ks ,ls )] + + f1*f1*f2*f1 * src[SRCIND4(is ,js ,ks+1,ls )] + + f2*f1*f2*f1 * src[SRCIND4(is+1,js ,ks+1,ls )] + + f1*f2*f2*f1 * src[SRCIND4(is ,js+1,ks+1,ls )] + + f2*f2*f2*f1 * src[SRCIND4(is+1,js+1,ks+1,ls )] + + f1*f1*f1*f2 * src[SRCIND4(is ,js ,ks ,ls+1)] + + f2*f1*f1*f2 * src[SRCIND4(is+1,js ,ks ,ls+1)] + + f1*f2*f1*f2 * src[SRCIND4(is ,js+1,ks ,ls+1)] + + f2*f2*f1*f2 * src[SRCIND4(is+1,js+1,ks ,ls+1)] + + f1*f1*f2*f2 * src[SRCIND4(is ,js ,ks+1,ls+1)] + + f2*f1*f2*f2 * src[SRCIND4(is+1,js ,ks+1,ls+1)] + + f1*f2*f2*f2 * src[SRCIND4(is ,js+1,ks+1,ls+1)] + + f2*f2*f2*f2 * src[SRCIND4(is+1,js+1,ks+1,ls+1)]; + i = i+1; + id = id+1; + is = is+1; + if (i < regiext) goto l81110; + goto l9111; + + // end i loop + l9111: + j = j+1; + jd = jd+1; + js = js+1; + if (j < regjext) goto l8110; + goto l911; + + // end j loop + l911: + k = k+1; + kd = kd+1; + ks = ks+1; + if (k < regkext) goto l810; + goto l91; + + // end k loop + l91: + l = l+1; + ld = ld+1; + ls = ls+1; + if (l < reglext) goto l81; + goto l81; + + } + + + +#define INSTANTIATE(T) \ + template \ + void \ + prolongate_4d_o1_rf2 (T const * restrict const src, \ + ivect4 const & restrict srcext, \ + T * restrict const dst, \ + ivect4 const & restrict dstext, \ + ibbox4 const & restrict srcbbox, \ + ibbox4 const & restrict dstbbox, \ + ibbox4 const & restrict regbbox); +#include "instantiate" +#undef INSTANTIATE + + + +} // CarpetLib |