diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-07-29 15:49:11 -0700 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-07-29 15:49:11 -0700 |
commit | 88e0d330988697021405cfd1034b99193cb5807c (patch) | |
tree | 6f34c48ca7ca86775d2e653cd4d126514723643a | |
parent | e3e9d9c11e6658fbc57b306d8881579a48d6f69e (diff) |
CarpetLib: Do not use LoopControl for restriction operators when parallelizing via gdata.cc
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc | 105 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc | 43 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc | 28 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_rf2.cc | 35 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc | 39 |
5 files changed, 242 insertions, 8 deletions
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc index 3d1ce9acf..7f5875630 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc @@ -139,6 +139,109 @@ namespace CarpetLib { + if (not use_loopcontrol_in_operators) { + + // Loop over coarse region +#pragma omp parallel for collapse(3) + for (int k=0; k<regkext; ++k) { + for (int j=0; j<regjext; ++j) { + for (int i=0; i<regiext; ++i) { + +#ifdef CARPET_DEBUG + if(not (2 * k + 2 + srckoff < srckext and + 2 * j + 2 + srcjoff < srcjext and + 2 * i + 2 + srcioff < srciext and + 2 * k - 1 + srckoff >= 0 and + 2 * j - 1 + srcjoff >= 0 and + 2 * i - 1 + srcioff >= 0)) + { + cout << "restrict_3d_cc_o3_rf2.cc\n"; + cout << "regext " << regext << "\n"; + cout << "srcext " << srcext << "\n"; + cout << "srcbbox=" << srcbbox << "\n"; + cout << "dstbbox=" << dstbbox << "\n"; + cout << "regbbox=" << regbbox << "\n"; + cout << "i,j,k=" << i << " " << j << " " << k << "\n"; + assert(2 * k + 2 + srckoff < srckext); + assert(2 * j + 2 + srcjoff < srcjext); + assert(2 * i + 2 + srcioff < srciext); + assert(2 * k - 1 + srckoff >= 0); + assert(2 * j - 1 + srcjoff >= 0); + assert(2 * i - 1 + srcioff >= 0); + } +#endif + dst [DSTIND3(i, j, k)] = + - f1 * src [SRCIND3(2*i-1, 2*j-1, 2*k-1)] + + f2 * src [SRCIND3(2*i , 2*j-1, 2*k-1)] + + f2 * src [SRCIND3(2*i+1, 2*j-1, 2*k-1)] + - f1 * src [SRCIND3(2*i+2, 2*j-1, 2*k-1)] + + f2 * src [SRCIND3(2*i-1, 2*j , 2*k-1)] + - f3 * src [SRCIND3(2*i , 2*j , 2*k-1)] + - f3 * src [SRCIND3(2*i+1, 2*j , 2*k-1)] + + f2 * src [SRCIND3(2*i+2, 2*j , 2*k-1)] + + f2 * src [SRCIND3(2*i-1, 2*j+1, 2*k-1)] + - f3 * src [SRCIND3(2*i , 2*j+1, 2*k-1)] + - f3 * src [SRCIND3(2*i+1, 2*j+1, 2*k-1)] + + f2 * src [SRCIND3(2*i+2, 2*j+1, 2*k-1)] + - f1 * src [SRCIND3(2*i-1, 2*j+2, 2*k-1)] + + f2 * src [SRCIND3(2*i , 2*j+2, 2*k-1)] + + f2 * src [SRCIND3(2*i+1, 2*j+2, 2*k-1)] + - f1 * src [SRCIND3(2*i+2, 2*j+2, 2*k-1)] + + f2 * src [SRCIND3(2*i-1, 2*j-1, 2*k )] + - f3 * src [SRCIND3(2*i , 2*j-1, 2*k )] + - f3 * src [SRCIND3(2*i+1, 2*j-1, 2*k )] + + f2 * src [SRCIND3(2*i+2, 2*j-1, 2*k )] + - f3 * src [SRCIND3(2*i-1, 2*j , 2*k )] + + f4 * src [SRCIND3(2*i , 2*j , 2*k )] + + f4 * src [SRCIND3(2*i+1, 2*j , 2*k )] + - f3 * src [SRCIND3(2*i+2, 2*j , 2*k )] + - f3 * src [SRCIND3(2*i-1, 2*j+1, 2*k )] + + f4 * src [SRCIND3(2*i , 2*j+1, 2*k )] + + f4 * src [SRCIND3(2*i+1, 2*j+1, 2*k )] + - f3 * src [SRCIND3(2*i+2, 2*j+1, 2*k )] + + f2 * src [SRCIND3(2*i-1, 2*j+2, 2*k )] + - f3 * src [SRCIND3(2*i , 2*j+2, 2*k )] + - f3 * src [SRCIND3(2*i+1, 2*j+2, 2*k )] + + f2 * src [SRCIND3(2*i+2, 2*j+2, 2*k )] + + f2 * src [SRCIND3(2*i-1, 2*j-1, 2*k+1)] + - f3 * src [SRCIND3(2*i , 2*j-1, 2*k+1)] + - f3 * src [SRCIND3(2*i+1, 2*j-1, 2*k+1)] + + f2 * src [SRCIND3(2*i+2, 2*j-1, 2*k+1)] + - f3 * src [SRCIND3(2*i-1, 2*j , 2*k+1)] + + f4 * src [SRCIND3(2*i , 2*j , 2*k+1)] + + f4 * src [SRCIND3(2*i+1, 2*j , 2*k+1)] + - f3 * src [SRCIND3(2*i+2, 2*j , 2*k+1)] + - f3 * src [SRCIND3(2*i-1, 2*j+1, 2*k+1)] + + f4 * src [SRCIND3(2*i , 2*j+1, 2*k+1)] + + f4 * src [SRCIND3(2*i+1, 2*j+1, 2*k+1)] + - f3 * src [SRCIND3(2*i+2, 2*j+1, 2*k+1)] + + f2 * src [SRCIND3(2*i-1, 2*j+2, 2*k+1)] + - f3 * src [SRCIND3(2*i , 2*j+2, 2*k+1)] + - f3 * src [SRCIND3(2*i+1, 2*j+2, 2*k+1)] + + f2 * src [SRCIND3(2*i+2, 2*j+2, 2*k+1)] + - f1 * src [SRCIND3(2*i-1, 2*j-1, 2*k+2)] + + f2 * src [SRCIND3(2*i , 2*j-1, 2*k+2)] + + f2 * src [SRCIND3(2*i+1, 2*j-1, 2*k+2)] + - f1 * src [SRCIND3(2*i+2, 2*j-1, 2*k+2)] + + f2 * src [SRCIND3(2*i-1, 2*j , 2*k+2)] + - f3 * src [SRCIND3(2*i , 2*j , 2*k+2)] + - f3 * src [SRCIND3(2*i+1, 2*j , 2*k+2)] + + f2 * src [SRCIND3(2*i+2, 2*j , 2*k+2)] + + f2 * src [SRCIND3(2*i-1, 2*j+1, 2*k+2)] + - f3 * src [SRCIND3(2*i , 2*j+1, 2*k+2)] + - f3 * src [SRCIND3(2*i+1, 2*j+1, 2*k+2)] + + f2 * src [SRCIND3(2*i+2, 2*j+1, 2*k+2)] + - f1 * src [SRCIND3(2*i-1, 2*j+2, 2*k+2)] + + f2 * src [SRCIND3(2*i , 2*j+2, 2*k+2)] + + f2 * src [SRCIND3(2*i+1, 2*j+2, 2*k+2)] + - f1 * src [SRCIND3(2*i+2, 2*j+2, 2*k+2)]; + + } + } + } + + } else { + // Loop over coarse region #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_o3_rf2, @@ -237,6 +340,8 @@ namespace CarpetLib { } CCTK_ENDLOOP3(restrict_3d_cc_o3_rf2); + } + } diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc index 5c2f88642..f92d260cb 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc @@ -204,6 +204,47 @@ namespace CarpetLib { + if (not use_loopcontrol_in_operators) { + + // Loop over coarse region +#pragma omp parallel for collapse(3) + for (int k=0; k<regkext; ++k) { + for (int j=0; j<regjext; ++j) { + for (int i=0; i<regiext; ++i) { + +#ifdef CARPET_DEBUG + if(not (2 * k + 3 + srckoff < srckext and + 2 * j + 3 + srcjoff < srcjext and + 2 * i + 3 + srcioff < srciext and + 2 * k - 2 + srckoff >= 0 and + 2 * j - 2 + srcjoff >= 0 and + 2 * i - 2 + srcioff >= 0)) + { + cout << "restrict_3d_cc_o3_rf2.cc\n"; + cout << "regext " << regext << "\n"; + cout << "srcext " << srcext << "\n"; + cout << "srcbbox=" << srcbbox << "\n"; + cout << "dstbbox=" << dstbbox << "\n"; + cout << "regbbox=" << regbbox << "\n"; + cout << "i,j,k=" << i << " " << j << " " << k << "\n"; + assert(2 * k + 2 + srckoff < srckext); + assert(2 * j + 2 + srcjoff < srcjext); + assert(2 * i + 2 + srcioff < srciext); + assert(2 * k - 1 + srckoff >= 0); + assert(2 * j - 1 + srcjoff >= 0); + assert(2 * i - 1 + srcioff >= 0); + } +#endif + dst [DSTIND3(i, j, k)] = + CarpetLib::restrict3 + (& src[SRCIND3(2*i, 2*j, 2*k)], srcdi, srcdj, srcdk); + + } + } + } + + } else { + // Loop over coarse region #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_o5_rf2, @@ -240,6 +281,8 @@ namespace CarpetLib { } CCTK_ENDLOOP3(restrict_3d_cc_o5_rf2); + } + } diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index e186cbced..3f16ed0c2 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -128,6 +128,32 @@ namespace CarpetLib { + if (not use_loopcontrol_in_operators) { + + // Loop over coarse region +#pragma omp parallel for collapse(3) + for (int k=0; k<regkext; ++k) { + for (int j=0; j<regjext; ++j) { + for (int i=0; i<regiext; ++i) { + + // TODO: Introduce higher-order restriction operators (but + // don't use these for hydro!) + dst [DSTIND3(i, j, k)] = + + f1*f1*f1 * src [SRCIND3(2*i , 2*j , 2*k )] + + f2*f1*f1 * src [SRCIND3(2*i+1, 2*j , 2*k )] + + f1*f2*f1 * src [SRCIND3(2*i , 2*j+1, 2*k )] + + f2*f2*f1 * src [SRCIND3(2*i+1, 2*j+1, 2*k )] + + f1*f1*f2 * src [SRCIND3(2*i , 2*j , 2*k+1)] + + f2*f1*f2 * src [SRCIND3(2*i+1, 2*j , 2*k+1)] + + f1*f2*f2 * src [SRCIND3(2*i , 2*j+1, 2*k+1)] + + f2*f2*f2 * src [SRCIND3(2*i+1, 2*j+1, 2*k+1)]; + + } + } + } + + } else { + // Loop over coarse region #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_rf2, @@ -149,6 +175,8 @@ namespace CarpetLib { } CCTK_ENDLOOP3(restrict_3d_cc_rf2); + } + } diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc index 9f0936e1f..0885479d8 100644 --- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc @@ -45,6 +45,8 @@ namespace CarpetLib { ibbox3 const & restrict regbbox, void * extraargs) { + DECLARE_CCTK_PARAMETERS; + assert (not extraargs); if (any (srcbbox.stride() >= regbbox.stride() or @@ -114,16 +116,33 @@ namespace CarpetLib { - // Loop over coarse region -#pragma omp parallel - CCTK_LOOP3(restrict_3d_rf2, - i,j,k, 0,0,0, regiext,regjext,regkext, - dstipadext,dstjpadext,dstkpadext) - { + if (not use_loopcontrol_in_operators) { + + // Loop over coarse region + for (int k=0; k<regkext; ++k) { + for (int j=0; j<regjext; ++j) { + for (int i=0; i<regiext; ++i) { + + dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)]; + + } + } + } - dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)]; + } else { - } CCTK_ENDLOOP3(restrict_3d_rf2); + // Loop over coarse region +#pragma omp parallel + CCTK_LOOP3(restrict_3d_rf2, + i,j,k, 0,0,0, regiext,regjext,regkext, + dstipadext,dstjpadext,dstkpadext) + { + + dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)]; + + } CCTK_ENDLOOP3(restrict_3d_rf2); + + } } diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc index 62450333c..f9b3c2247 100644 --- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc @@ -249,6 +249,43 @@ namespace CarpetLib { size_t const srcdk = SRCOFF3(0,0,1) - SRCOFF3(0,0,0); + + if (not use_loopcontrol_in_operators) { + + // Loop over coarse region +#pragma omp parallel for collapse(3) + for (int k=0; k<regkext; ++k) { + for (int j=0; j<regjext; ++j) { + for (int i=0; i<regiext; ++i) { +#ifdef CARPET_DEBUG + if(not (2 * k + centk < srckext and + 2 * j + centj < srcjext and + 2 * i + centi < srciext)) + { + cout << "restrict_3d_vc_rf2.cc\n"; + cout << "regext " << regext << "\n"; + cout << "srcext " << srcext << "\n"; + cout << "srcbbox=" << srcbbox << "\n"; + cout << "dstbbox=" << dstbbox << "\n"; + cout << "regbbox=" << regbbox << "\n"; + cout << "srcregbbox=" << srcregbbox << "\n"; + cout << "icent=" << icent << "\n"; + } + assert(2 * k + centk < srckext and + 2 * j + centj < srcjext and + 2 * i + centi < srciext); +#endif + + dst [DSTIND3(i, j, k)] = + restrict3<T,centi,centj,centk>::call + (& src[SRCIND3(2*i, 2*j, 2*k)], srcdi, srcdj, srcdk); + + } + } + } + + } else { + // Loop over coarse region #pragma omp parallel CCTK_LOOP3(restrict_3d_vc_rf2, @@ -280,6 +317,8 @@ namespace CarpetLib { } CCTK_ENDLOOP3(restrict_3d_vc_rf2); + } + } |