aboutsummaryrefslogtreecommitdiff
path: root/Carpet/CarpetLib
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-07-29 15:49:11 -0700
committerErik Schnetter <schnetter@gmail.com>2013-07-29 15:49:11 -0700
commit88e0d330988697021405cfd1034b99193cb5807c (patch)
tree6f34c48ca7ca86775d2e653cd4d126514723643a /Carpet/CarpetLib
parente3e9d9c11e6658fbc57b306d8881579a48d6f69e (diff)
CarpetLib: Do not use LoopControl for restriction operators when parallelizing via gdata.cc
Diffstat (limited to 'Carpet/CarpetLib')
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc105
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc43
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc28
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_rf2.cc35
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc39
5 files changed, 242 insertions, 8 deletions
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
index 3d1ce9acf..7f5875630 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_o3_rf2.cc
@@ -139,6 +139,109 @@ namespace CarpetLib {
+ if (not use_loopcontrol_in_operators) {
+
+ // Loop over coarse region
+#pragma omp parallel for collapse(3)
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+#ifdef CARPET_DEBUG
+ if(not (2 * k + 2 + srckoff < srckext and
+ 2 * j + 2 + srcjoff < srcjext and
+ 2 * i + 2 + srcioff < srciext and
+ 2 * k - 1 + srckoff >= 0 and
+ 2 * j - 1 + srcjoff >= 0 and
+ 2 * i - 1 + srcioff >= 0))
+ {
+ cout << "restrict_3d_cc_o3_rf2.cc\n";
+ cout << "regext " << regext << "\n";
+ cout << "srcext " << srcext << "\n";
+ cout << "srcbbox=" << srcbbox << "\n";
+ cout << "dstbbox=" << dstbbox << "\n";
+ cout << "regbbox=" << regbbox << "\n";
+ cout << "i,j,k=" << i << " " << j << " " << k << "\n";
+ assert(2 * k + 2 + srckoff < srckext);
+ assert(2 * j + 2 + srcjoff < srcjext);
+ assert(2 * i + 2 + srcioff < srciext);
+ assert(2 * k - 1 + srckoff >= 0);
+ assert(2 * j - 1 + srcjoff >= 0);
+ assert(2 * i - 1 + srcioff >= 0);
+ }
+#endif
+ dst [DSTIND3(i, j, k)] =
+ - f1 * src [SRCIND3(2*i-1, 2*j-1, 2*k-1)]
+ + f2 * src [SRCIND3(2*i , 2*j-1, 2*k-1)]
+ + f2 * src [SRCIND3(2*i+1, 2*j-1, 2*k-1)]
+ - f1 * src [SRCIND3(2*i+2, 2*j-1, 2*k-1)]
+ + f2 * src [SRCIND3(2*i-1, 2*j , 2*k-1)]
+ - f3 * src [SRCIND3(2*i , 2*j , 2*k-1)]
+ - f3 * src [SRCIND3(2*i+1, 2*j , 2*k-1)]
+ + f2 * src [SRCIND3(2*i+2, 2*j , 2*k-1)]
+ + f2 * src [SRCIND3(2*i-1, 2*j+1, 2*k-1)]
+ - f3 * src [SRCIND3(2*i , 2*j+1, 2*k-1)]
+ - f3 * src [SRCIND3(2*i+1, 2*j+1, 2*k-1)]
+ + f2 * src [SRCIND3(2*i+2, 2*j+1, 2*k-1)]
+ - f1 * src [SRCIND3(2*i-1, 2*j+2, 2*k-1)]
+ + f2 * src [SRCIND3(2*i , 2*j+2, 2*k-1)]
+ + f2 * src [SRCIND3(2*i+1, 2*j+2, 2*k-1)]
+ - f1 * src [SRCIND3(2*i+2, 2*j+2, 2*k-1)]
+ + f2 * src [SRCIND3(2*i-1, 2*j-1, 2*k )]
+ - f3 * src [SRCIND3(2*i , 2*j-1, 2*k )]
+ - f3 * src [SRCIND3(2*i+1, 2*j-1, 2*k )]
+ + f2 * src [SRCIND3(2*i+2, 2*j-1, 2*k )]
+ - f3 * src [SRCIND3(2*i-1, 2*j , 2*k )]
+ + f4 * src [SRCIND3(2*i , 2*j , 2*k )]
+ + f4 * src [SRCIND3(2*i+1, 2*j , 2*k )]
+ - f3 * src [SRCIND3(2*i+2, 2*j , 2*k )]
+ - f3 * src [SRCIND3(2*i-1, 2*j+1, 2*k )]
+ + f4 * src [SRCIND3(2*i , 2*j+1, 2*k )]
+ + f4 * src [SRCIND3(2*i+1, 2*j+1, 2*k )]
+ - f3 * src [SRCIND3(2*i+2, 2*j+1, 2*k )]
+ + f2 * src [SRCIND3(2*i-1, 2*j+2, 2*k )]
+ - f3 * src [SRCIND3(2*i , 2*j+2, 2*k )]
+ - f3 * src [SRCIND3(2*i+1, 2*j+2, 2*k )]
+ + f2 * src [SRCIND3(2*i+2, 2*j+2, 2*k )]
+ + f2 * src [SRCIND3(2*i-1, 2*j-1, 2*k+1)]
+ - f3 * src [SRCIND3(2*i , 2*j-1, 2*k+1)]
+ - f3 * src [SRCIND3(2*i+1, 2*j-1, 2*k+1)]
+ + f2 * src [SRCIND3(2*i+2, 2*j-1, 2*k+1)]
+ - f3 * src [SRCIND3(2*i-1, 2*j , 2*k+1)]
+ + f4 * src [SRCIND3(2*i , 2*j , 2*k+1)]
+ + f4 * src [SRCIND3(2*i+1, 2*j , 2*k+1)]
+ - f3 * src [SRCIND3(2*i+2, 2*j , 2*k+1)]
+ - f3 * src [SRCIND3(2*i-1, 2*j+1, 2*k+1)]
+ + f4 * src [SRCIND3(2*i , 2*j+1, 2*k+1)]
+ + f4 * src [SRCIND3(2*i+1, 2*j+1, 2*k+1)]
+ - f3 * src [SRCIND3(2*i+2, 2*j+1, 2*k+1)]
+ + f2 * src [SRCIND3(2*i-1, 2*j+2, 2*k+1)]
+ - f3 * src [SRCIND3(2*i , 2*j+2, 2*k+1)]
+ - f3 * src [SRCIND3(2*i+1, 2*j+2, 2*k+1)]
+ + f2 * src [SRCIND3(2*i+2, 2*j+2, 2*k+1)]
+ - f1 * src [SRCIND3(2*i-1, 2*j-1, 2*k+2)]
+ + f2 * src [SRCIND3(2*i , 2*j-1, 2*k+2)]
+ + f2 * src [SRCIND3(2*i+1, 2*j-1, 2*k+2)]
+ - f1 * src [SRCIND3(2*i+2, 2*j-1, 2*k+2)]
+ + f2 * src [SRCIND3(2*i-1, 2*j , 2*k+2)]
+ - f3 * src [SRCIND3(2*i , 2*j , 2*k+2)]
+ - f3 * src [SRCIND3(2*i+1, 2*j , 2*k+2)]
+ + f2 * src [SRCIND3(2*i+2, 2*j , 2*k+2)]
+ + f2 * src [SRCIND3(2*i-1, 2*j+1, 2*k+2)]
+ - f3 * src [SRCIND3(2*i , 2*j+1, 2*k+2)]
+ - f3 * src [SRCIND3(2*i+1, 2*j+1, 2*k+2)]
+ + f2 * src [SRCIND3(2*i+2, 2*j+1, 2*k+2)]
+ - f1 * src [SRCIND3(2*i-1, 2*j+2, 2*k+2)]
+ + f2 * src [SRCIND3(2*i , 2*j+2, 2*k+2)]
+ + f2 * src [SRCIND3(2*i+1, 2*j+2, 2*k+2)]
+ - f1 * src [SRCIND3(2*i+2, 2*j+2, 2*k+2)];
+
+ }
+ }
+ }
+
+ } else {
+
// Loop over coarse region
#pragma omp parallel
CCTK_LOOP3(restrict_3d_cc_o3_rf2,
@@ -237,6 +340,8 @@ namespace CarpetLib {
} CCTK_ENDLOOP3(restrict_3d_cc_o3_rf2);
+ }
+
}
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
index 5c2f88642..f92d260cb 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_o5_rf2.cc
@@ -204,6 +204,47 @@ namespace CarpetLib {
+ if (not use_loopcontrol_in_operators) {
+
+ // Loop over coarse region
+#pragma omp parallel for collapse(3)
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+#ifdef CARPET_DEBUG
+ if(not (2 * k + 3 + srckoff < srckext and
+ 2 * j + 3 + srcjoff < srcjext and
+ 2 * i + 3 + srcioff < srciext and
+ 2 * k - 2 + srckoff >= 0 and
+ 2 * j - 2 + srcjoff >= 0 and
+ 2 * i - 2 + srcioff >= 0))
+ {
+ cout << "restrict_3d_cc_o3_rf2.cc\n";
+ cout << "regext " << regext << "\n";
+ cout << "srcext " << srcext << "\n";
+ cout << "srcbbox=" << srcbbox << "\n";
+ cout << "dstbbox=" << dstbbox << "\n";
+ cout << "regbbox=" << regbbox << "\n";
+ cout << "i,j,k=" << i << " " << j << " " << k << "\n";
+ assert(2 * k + 2 + srckoff < srckext);
+ assert(2 * j + 2 + srcjoff < srcjext);
+ assert(2 * i + 2 + srcioff < srciext);
+ assert(2 * k - 1 + srckoff >= 0);
+ assert(2 * j - 1 + srcjoff >= 0);
+ assert(2 * i - 1 + srcioff >= 0);
+ }
+#endif
+ dst [DSTIND3(i, j, k)] =
+ CarpetLib::restrict3
+ (& src[SRCIND3(2*i, 2*j, 2*k)], srcdi, srcdj, srcdk);
+
+ }
+ }
+ }
+
+ } else {
+
// Loop over coarse region
#pragma omp parallel
CCTK_LOOP3(restrict_3d_cc_o5_rf2,
@@ -240,6 +281,8 @@ namespace CarpetLib {
} CCTK_ENDLOOP3(restrict_3d_cc_o5_rf2);
+ }
+
}
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
index e186cbced..3f16ed0c2 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
@@ -128,6 +128,32 @@ namespace CarpetLib {
+ if (not use_loopcontrol_in_operators) {
+
+ // Loop over coarse region
+#pragma omp parallel for collapse(3)
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+ // TODO: Introduce higher-order restriction operators (but
+ // don't use these for hydro!)
+ dst [DSTIND3(i, j, k)] =
+ + f1*f1*f1 * src [SRCIND3(2*i , 2*j , 2*k )]
+ + f2*f1*f1 * src [SRCIND3(2*i+1, 2*j , 2*k )]
+ + f1*f2*f1 * src [SRCIND3(2*i , 2*j+1, 2*k )]
+ + f2*f2*f1 * src [SRCIND3(2*i+1, 2*j+1, 2*k )]
+ + f1*f1*f2 * src [SRCIND3(2*i , 2*j , 2*k+1)]
+ + f2*f1*f2 * src [SRCIND3(2*i+1, 2*j , 2*k+1)]
+ + f1*f2*f2 * src [SRCIND3(2*i , 2*j+1, 2*k+1)]
+ + f2*f2*f2 * src [SRCIND3(2*i+1, 2*j+1, 2*k+1)];
+
+ }
+ }
+ }
+
+ } else {
+
// Loop over coarse region
#pragma omp parallel
CCTK_LOOP3(restrict_3d_cc_rf2,
@@ -149,6 +175,8 @@ namespace CarpetLib {
} CCTK_ENDLOOP3(restrict_3d_cc_rf2);
+ }
+
}
diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
index 9f0936e1f..0885479d8 100644
--- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc
@@ -45,6 +45,8 @@ namespace CarpetLib {
ibbox3 const & restrict regbbox,
void * extraargs)
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (not extraargs);
if (any (srcbbox.stride() >= regbbox.stride() or
@@ -114,16 +116,33 @@ namespace CarpetLib {
- // Loop over coarse region
-#pragma omp parallel
- CCTK_LOOP3(restrict_3d_rf2,
- i,j,k, 0,0,0, regiext,regjext,regkext,
- dstipadext,dstjpadext,dstkpadext)
- {
+ if (not use_loopcontrol_in_operators) {
+
+ // Loop over coarse region
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+
+ dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)];
+
+ }
+ }
+ }
- dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)];
+ } else {
- } CCTK_ENDLOOP3(restrict_3d_rf2);
+ // Loop over coarse region
+#pragma omp parallel
+ CCTK_LOOP3(restrict_3d_rf2,
+ i,j,k, 0,0,0, regiext,regjext,regkext,
+ dstipadext,dstjpadext,dstkpadext)
+ {
+
+ dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)];
+
+ } CCTK_ENDLOOP3(restrict_3d_rf2);
+
+ }
}
diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
index 62450333c..f9b3c2247 100644
--- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
@@ -249,6 +249,43 @@ namespace CarpetLib {
size_t const srcdk = SRCOFF3(0,0,1) - SRCOFF3(0,0,0);
+
+ if (not use_loopcontrol_in_operators) {
+
+ // Loop over coarse region
+#pragma omp parallel for collapse(3)
+ for (int k=0; k<regkext; ++k) {
+ for (int j=0; j<regjext; ++j) {
+ for (int i=0; i<regiext; ++i) {
+#ifdef CARPET_DEBUG
+ if(not (2 * k + centk < srckext and
+ 2 * j + centj < srcjext and
+ 2 * i + centi < srciext))
+ {
+ cout << "restrict_3d_vc_rf2.cc\n";
+ cout << "regext " << regext << "\n";
+ cout << "srcext " << srcext << "\n";
+ cout << "srcbbox=" << srcbbox << "\n";
+ cout << "dstbbox=" << dstbbox << "\n";
+ cout << "regbbox=" << regbbox << "\n";
+ cout << "srcregbbox=" << srcregbbox << "\n";
+ cout << "icent=" << icent << "\n";
+ }
+ assert(2 * k + centk < srckext and
+ 2 * j + centj < srcjext and
+ 2 * i + centi < srciext);
+#endif
+
+ dst [DSTIND3(i, j, k)] =
+ restrict3<T,centi,centj,centk>::call
+ (& src[SRCIND3(2*i, 2*j, 2*k)], srcdi, srcdj, srcdk);
+
+ }
+ }
+ }
+
+ } else {
+
// Loop over coarse region
#pragma omp parallel
CCTK_LOOP3(restrict_3d_vc_rf2,
@@ -280,6 +317,8 @@ namespace CarpetLib {
} CCTK_ENDLOOP3(restrict_3d_vc_rf2);
+ }
+
}