aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2012-02-24 15:47:08 -0500
committerBarry Wardell <barry.wardell@gmail.com>2012-09-11 18:23:03 +0100
commit98e2f3315e6210e1c4c4c392eaa855d8ece56641 (patch)
tree21a139dfab764ccd47205bd65044225f2e584852 /Carpet
parenta50b94db8d36ffd6c9aca449299e8148c2b8fd33 (diff)
CarpetLib: Move some OpenMP parallelisation into the operators
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/CarpetLib/src/copy_3d.cc7
-rw-r--r--Carpet/CarpetLib/src/copy_4d.cc1
-rw-r--r--Carpet/CarpetLib/src/data.cc134
-rw-r--r--Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc8
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc1
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc1
-rw-r--r--Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc1
-rw-r--r--Carpet/CarpetLib/src/restrict_4d_rf2.cc1
8 files changed, 79 insertions, 75 deletions
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc
index 81544a265..5e2eba236 100644
--- a/Carpet/CarpetLib/src/copy_3d.cc
+++ b/Carpet/CarpetLib/src/copy_3d.cc
@@ -133,19 +133,14 @@ namespace CarpetLib {
// Loop over region
+#pragma omp parallel for collapse(3)
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
-#if 1
for (int i=0; i<regiext; ++i) {
dst [DSTIND3(i, j, k)] = src [SRCIND3(i, j, k)];
}
-#else
- memcpy (& dst [DSTIND3(0, j, k)],
- & src [SRCIND3(0, j, k)],
- regiext * sizeof(T));
-#endif
}
}
diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc
index 232ab0914..b0012e196 100644
--- a/Carpet/CarpetLib/src/copy_4d.cc
+++ b/Carpet/CarpetLib/src/copy_4d.cc
@@ -112,6 +112,7 @@ namespace CarpetLib {
// Loop over region
+#pragma omp parallel for collapse(4)
for (int l=0; l<reglext; ++l) {
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 1ee6c94e7..633b8bf5d 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -543,21 +543,21 @@ copy_from_innerloop (gdata const * const gsrc,
ibbox const& dstbox = this->extent();
#if CARPET_DIM == 3
- call_operator<T> (& copy_3d,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox, dstbox,
- srcregbox, dstregbox, (void*)slabinfo);
+ // Don't use call_operator, because we parallelise ourselves
+ copy_3d(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ srcbox, dstbox,
+ srcregbox, dstregbox, (void*)slabinfo);
#elif CARPET_DIM == 4
- call_operator<T> (& copy_4d,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox, dstbox,
- srcregbox, dstregbox, (void*)slabinfo);
+ // Don't use call_operator, because we parallelise ourselves
+ copy_4d(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ srcbox, dstbox,
+ srcregbox, dstregbox, (void*)slabinfo);
#else
# error "Value for CARPET_DIM not supported"
#endif
@@ -776,14 +776,14 @@ transfer_prolongate (data const * const src,
}
case cell_centered: {
if (use_dgfe) {
- call_operator<T>(prolongate_3d_dgfe_rf2<T,5>,
- static_cast<T const *>(src->storage()),
- src->shape(),
- static_cast<T *>(this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- srcbox, dstbox, NULL);
+ // Don't use call_operator, because we parallelise ourselves
+ prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
+ src->shape(),
+ static_cast<T *>(this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ srcbox, dstbox, NULL);
break;
}
static
@@ -1135,51 +1135,51 @@ transfer_restrict (data const * const src,
if (all(is_centered == ivect(1,1,1))) {
if (use_dgfe) {
- call_operator<T>(restrict_3d_dgfe_rf2<T,5>,
- static_cast<T const *>(src->storage()),
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
+ src->shape(),
+ static_cast<T *>(this->storage()),
+ this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
+ break;
+ }
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_3d_cc_rf2(static_cast <T const *> (src->storage()),
src->shape(),
- static_cast<T *>(this->storage()),
+ static_cast <T *> (this->storage()),
this->shape(),
srcbox,
dstbox,
srcregbox, dstregbox, NULL);
- break;
- }
- call_operator<T> (& restrict_3d_cc_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(0,1,1))) {
- call_operator<T> (& restrict_3d_vc_rf2<T,0,1,1>,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(1,0,1))) {
- call_operator<T> (& restrict_3d_vc_rf2<T,1,0,1>,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
} else if (all(is_centered == ivect(1,1,0))) {
- call_operator<T> (& restrict_3d_vc_rf2<T,1,1,0>,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- srcbox,
- dstbox,
- srcregbox, dstregbox, NULL);
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ srcbox,
+ dstbox,
+ srcregbox, dstregbox, NULL);
} else {
assert (0);
}
@@ -1203,14 +1203,14 @@ transfer_restrict (data const * const src,
// enum centering { vertex_centered, cell_centered };
switch (cent) {
case vertex_centered:
- call_operator<T> (& restrict_4d_rf2,
- static_cast <T const *> (src->storage()),
- src->shape(),
- static_cast <T *> (this->storage()),
- this->shape(),
- src->extent(),
- this->extent(),
- srcregbox, dstregbox, NULL);
+ // Don't use call_operator, because we parallelise ourselves
+ restrict_4d_rf2(static_cast <T const *> (src->storage()),
+ src->shape(),
+ static_cast <T *> (this->storage()),
+ this->shape(),
+ src->extent(),
+ this->extent(),
+ srcregbox, dstregbox, NULL);
break;
default:
assert (0);
diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
index 87a219007..b6bd0c5d2 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
@@ -138,9 +138,10 @@ namespace CarpetLib {
int const dststr2d[2] = {dstdj, dstdk};
// Loop over fine region
+ ptrdiff_t const i=0;
+#pragma omp parallel for collapse(2)
for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
- ptrdiff_t const i=0;
#ifdef HRSCC_HH
GLLElement<ORDER>::prolongate_2D
(&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d,
@@ -176,8 +177,9 @@ namespace CarpetLib {
int const dststr2d[2]= {dstdi, dstdk};
// Loop over fine region
+ ptrdiff_t const j=0;
+#pragma omp parallel for collapse(2)
for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
- ptrdiff_t const j=0;
for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
#ifdef HRSCC_HH
GLLElement<ORDER>::prolongate_2D
@@ -215,6 +217,7 @@ namespace CarpetLib {
// Loop over fine region
ptrdiff_t const k=0;
+#pragma omp parallel for collapse(2)
for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
#ifdef HRSCC_HH
@@ -250,6 +253,7 @@ namespace CarpetLib {
int const dststr[3] = {dstdi, dstdj, dstdk};
// Loop over fine region
+#pragma omp parallel for collapse(3)
for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
index 767476db3..c9c4bfc2d 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
@@ -119,6 +119,7 @@ namespace CarpetLib {
// Loop over coarse region
+#pragma omp parallel for collapse(3)
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
for (int i=0; i<regiext; ++i) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
index 42a087692..cab088347 100644
--- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
@@ -123,6 +123,7 @@ namespace CarpetLib {
assert(all(regext % (ORDER+1) == 0));
// Loop over coarse region
+#pragma omp parallel for collapse(3)
for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) {
for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) {
for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
index acf27fb95..f750ca149 100644
--- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
@@ -229,6 +229,7 @@ namespace CarpetLib {
// Loop over coarse region
+#pragma omp parallel for collapse(3)
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {
for (int i=0; i<regiext; ++i) {
diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
index 10d84b890..1a89171d7 100644
--- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
@@ -111,6 +111,7 @@ namespace CarpetLib {
// Loop over coarse region
+#pragma omp parallel for collapse(4)
for (int l=0; l<reglext; ++l) {
for (int k=0; k<regkext; ++k) {
for (int j=0; j<regjext; ++j) {