diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2012-02-24 15:47:08 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@cct.lsu.edu> | 2012-02-24 15:47:08 -0500 |
commit | 34181a5c6c82ece159f7466fea9e61e083b87f8c (patch) | |
tree | 88370f24f0ecd94a9506853c0cd504dc50d079ba | |
parent | 3bd376a48fe9f635d336f2fc669da3fcb6288906 (diff) |
CarpetLib: Move some OpenMP parallelisation into the operators
-rw-r--r-- | Carpet/CarpetLib/src/copy_3d.cc | 7 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/copy_4d.cc | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.cc | 134 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc | 8 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_4d_rf2.cc | 1 |
8 files changed, 79 insertions, 75 deletions
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc index 81544a265..5e2eba236 100644 --- a/Carpet/CarpetLib/src/copy_3d.cc +++ b/Carpet/CarpetLib/src/copy_3d.cc @@ -133,19 +133,14 @@ namespace CarpetLib { // Loop over region +#pragma omp parallel for collapse(3) for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { -#if 1 for (int i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = src [SRCIND3(i, j, k)]; } -#else - memcpy (& dst [DSTIND3(0, j, k)], - & src [SRCIND3(0, j, k)], - regiext * sizeof(T)); -#endif } } diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc index 232ab0914..b0012e196 100644 --- a/Carpet/CarpetLib/src/copy_4d.cc +++ b/Carpet/CarpetLib/src/copy_4d.cc @@ -112,6 +112,7 @@ namespace CarpetLib { // Loop over region +#pragma omp parallel for collapse(4) for (int l=0; l<reglext; ++l) { for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 1ee6c94e7..633b8bf5d 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -543,21 +543,21 @@ copy_from_innerloop (gdata const * const gsrc, ibbox const& dstbox = this->extent(); #if CARPET_DIM == 3 - call_operator<T> (& copy_3d, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, dstbox, - srcregbox, dstregbox, (void*)slabinfo); + // Don't use call_operator, because we parallelise ourselves + copy_3d(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + srcbox, dstbox, + srcregbox, dstregbox, (void*)slabinfo); #elif CARPET_DIM == 4 - call_operator<T> (& copy_4d, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, dstbox, - srcregbox, dstregbox, (void*)slabinfo); + // Don't use call_operator, because we parallelise ourselves + copy_4d(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + srcbox, dstbox, + srcregbox, dstregbox, (void*)slabinfo); #else # error "Value for CARPET_DIM not supported" #endif @@ -776,14 +776,14 @@ transfer_prolongate (data const * const src, } case cell_centered: { if (use_dgfe) { - call_operator<T>(prolongate_3d_dgfe_rf2<T,5>, - static_cast<T const *>(src->storage()), - src->shape(), - static_cast<T *>(this->storage()), - this->shape(), - src->extent(), - this->extent(), - srcbox, dstbox, NULL); + // Don't use call_operator, because we parallelise ourselves + prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), + src->shape(), + static_cast<T *>(this->storage()), + this->shape(), + src->extent(), + this->extent(), + srcbox, dstbox, NULL); break; } static @@ -1135,51 +1135,51 @@ transfer_restrict (data const * const src, if (all(is_centered == ivect(1,1,1))) { if (use_dgfe) { - call_operator<T>(restrict_3d_dgfe_rf2<T,5>, - static_cast<T const *>(src->storage()), + // Don't use call_operator, because we parallelise ourselves + restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()), + src->shape(), + static_cast<T *>(this->storage()), + this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); + break; + } + // Don't use call_operator, because we parallelise ourselves + restrict_3d_cc_rf2(static_cast <T const *> (src->storage()), src->shape(), - static_cast<T *>(this->storage()), + static_cast <T *> (this->storage()), this->shape(), srcbox, dstbox, srcregbox, dstregbox, NULL); - break; - } - call_operator<T> (& restrict_3d_cc_rf2, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(0,1,1))) { - call_operator<T> (& restrict_3d_vc_rf2<T,0,1,1>, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + // Don't use call_operator, because we parallelise ourselves + restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,0,1))) { - call_operator<T> (& restrict_3d_vc_rf2<T,1,0,1>, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + // Don't use call_operator, because we parallelise ourselves + restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); } else if (all(is_centered == ivect(1,1,0))) { - call_operator<T> (& restrict_3d_vc_rf2<T,1,1,0>, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - srcbox, - dstbox, - srcregbox, dstregbox, NULL); + // Don't use call_operator, because we parallelise ourselves + restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + srcbox, + dstbox, + srcregbox, dstregbox, NULL); } else { assert (0); } @@ -1203,14 +1203,14 @@ transfer_restrict (data const * const src, // enum centering { vertex_centered, cell_centered }; switch (cent) { case vertex_centered: - call_operator<T> (& restrict_4d_rf2, - static_cast <T const *> (src->storage()), - src->shape(), - static_cast <T *> (this->storage()), - this->shape(), - src->extent(), - this->extent(), - srcregbox, dstregbox, NULL); + // Don't use call_operator, because we parallelise ourselves + restrict_4d_rf2(static_cast <T const *> (src->storage()), + src->shape(), + static_cast <T *> (this->storage()), + this->shape(), + src->extent(), + this->extent(), + srcregbox, dstregbox, NULL); break; default: assert (0); diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc index 87a219007..b6bd0c5d2 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc @@ -138,9 +138,10 @@ namespace CarpetLib { int const dststr2d[2] = {dstdj, dstdk}; // Loop over fine region + ptrdiff_t const i=0; +#pragma omp parallel for collapse(2) for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { - ptrdiff_t const i=0; #ifdef HRSCC_HH GLLElement<ORDER>::prolongate_2D (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d, @@ -176,8 +177,9 @@ namespace CarpetLib { int const dststr2d[2]= {dstdi, dstdk}; // Loop over fine region + ptrdiff_t const j=0; +#pragma omp parallel for collapse(2) for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { - ptrdiff_t const j=0; for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { #ifdef HRSCC_HH GLLElement<ORDER>::prolongate_2D @@ -215,6 +217,7 @@ namespace CarpetLib { // Loop over fine region ptrdiff_t const k=0; +#pragma omp parallel for collapse(2) for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { #ifdef HRSCC_HH @@ -250,6 +253,7 @@ namespace CarpetLib { int const dststr[3] = {dstdi, dstdj, dstdk}; // Loop over fine region +#pragma omp parallel for collapse(3) for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) { for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) { for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) { diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index 767476db3..c9c4bfc2d 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -119,6 +119,7 @@ namespace CarpetLib { // Loop over coarse region +#pragma omp parallel for collapse(3) for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc index 42a087692..cab088347 100644 --- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc @@ -123,6 +123,7 @@ namespace CarpetLib { assert(all(regext % (ORDER+1) == 0)); // Loop over coarse region +#pragma omp parallel for collapse(3) for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) { for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) { for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) { diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc index acf27fb95..f750ca149 100644 --- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc @@ -229,6 +229,7 @@ namespace CarpetLib { // Loop over coarse region +#pragma omp parallel for collapse(3) for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc index 10d84b890..1a89171d7 100644 --- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc @@ -111,6 +111,7 @@ namespace CarpetLib { // Loop over coarse region +#pragma omp parallel for collapse(4) for (int l=0; l<reglext; ++l) { for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { |