diff options
Diffstat (limited to 'Carpet/CarpetLib/src')
-rw-r--r-- | Carpet/CarpetLib/src/copy_3d.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/dist.cc | 44 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/dist.hh | 22 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/interpolate_3d_2tl.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/interpolate_3d_3tl.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/interpolate_3d_4tl.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/interpolate_3d_5tl.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc | 37 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc | 2 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc | 1 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/restrict_3d_rf2.cc | 37 |
13 files changed, 204 insertions, 126 deletions
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc index 644a67978..96269e7ff 100644 --- a/Carpet/CarpetLib/src/copy_3d.cc +++ b/Carpet/CarpetLib/src/copy_3d.cc @@ -74,32 +74,33 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = src [SRCIND3(i, j, k)]; diff --git a/Carpet/CarpetLib/src/dist.cc b/Carpet/CarpetLib/src/dist.cc index 79871f3f0..1af4bcf38 100644 --- a/Carpet/CarpetLib/src/dist.cc +++ b/Carpet/CarpetLib/src/dist.cc @@ -1,5 +1,8 @@ #include <cassert> +#ifdef _OPENMP +# include <omp.h> +#endif #include <mpi.h> #include "cctk.h" @@ -63,4 +66,45 @@ namespace dist { } } + // Local number of threads + int num_threads_worker () + { + DECLARE_CCTK_PARAMETERS; + int num_threads_; +#ifdef _OPENMP + if (num_threads > 0) { + // Set number of threads which should be used + // TODO: do this at startup, not in this routine + omp_set_num_threads (num_threads); + } +#pragma omp parallel + { +#pragma omp single nowait + { + num_threads_ = omp_get_num_threads(); + } + } +#else + if (num_threads > 0 and num_threads != 1) { + CCTK_WARN (CCTK_WARN_ABORT, + "OpenMP is not enabled. Cannot set the number of threads."); + } + num_threads_ = 1; +#endif + assert (num_threads_ >= 1); + return num_threads_; + } + + // Global number of threads + int total_num_threads_worker () + { + int total_num_threads_; + int const mynthreads = num_threads(); + MPI_Allreduce + (const_cast <int *> (& mynthreads), & total_num_threads_, 1, MPI_INT, + MPI_SUM, comm()); + assert (total_num_threads_ >= size()); + return total_num_threads_; + } + } // namespace dist diff --git a/Carpet/CarpetLib/src/dist.hh b/Carpet/CarpetLib/src/dist.hh index 38d7219d1..1206901e3 100644 --- a/Carpet/CarpetLib/src/dist.hh +++ b/Carpet/CarpetLib/src/dist.hh @@ -63,6 +63,28 @@ namespace dist { return size_; } + // Local number of threads + int num_threads_worker (); + inline int num_threads () + { + static int num_threads_ = -1; + if (num_threads_ == -1) { + num_threads_ = num_threads_worker(); + } + return num_threads_; + } + + // Global number of threads + int total_num_threads_worker (); + inline int total_num_threads () + { + static int total_num_threads_ = -1; + if (total_num_threads_ == -1) { + total_num_threads_ = total_num_threads_worker(); + } + return total_num_threads_; + } + ///////////////////////////////////////////////////////////////////////// diff --git a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc index 9a2cb99f0..78170b1ee 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_2tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_2tl.cc @@ -82,25 +82,25 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; @@ -120,9 +120,10 @@ namespace CarpetLib { // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = + s1fac * src1 [SRCIND3(i, j, k)] diff --git a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc index afcff458b..9a1d0e5d7 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_3tl.cc @@ -84,25 +84,25 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; @@ -124,9 +124,10 @@ namespace CarpetLib { // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = + s1fac * src1 [SRCIND3(i, j, k)] diff --git a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc index 8c08cd3ab..9892d0bbf 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_4tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_4tl.cc @@ -86,25 +86,25 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; @@ -131,9 +131,10 @@ namespace CarpetLib { // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = + s1fac * src1 [SRCIND3(i, j, k)] diff --git a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc index 7b380dafe..abad807b1 100644 --- a/Carpet/CarpetLib/src/interpolate_3d_5tl.cc +++ b/Carpet/CarpetLib/src/interpolate_3d_5tl.cc @@ -88,25 +88,25 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; @@ -136,9 +136,10 @@ namespace CarpetLib { // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = + s1fac * src1 [SRCIND3(i, j, k)] diff --git a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc index 0af632f88..ef3a69053 100644 --- a/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc +++ b/Carpet/CarpetLib/src/interpolate_eno_3d_3tl.cc @@ -102,25 +102,25 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; @@ -154,9 +154,10 @@ namespace CarpetLib { // Loop over region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { T const s1 = src1 [SRCIND3(i, j, k)]; T const s2 = src2 [SRCIND3(i, j, k)]; diff --git a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc index 751380e53..f58295560 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/prolongate_3d_cc_rf2.cc @@ -120,6 +120,7 @@ namespace CarpetLib { +#pragma omp parallel for for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { @@ -252,6 +253,7 @@ namespace CarpetLib { +#pragma omp parallel for for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 index 52bca8fa5..c86f773fc 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_eno.F90 @@ -190,6 +190,7 @@ subroutine prolongate_3d_real8_eno (src, srciext, srcjext, & !!$ Loop over fine region +!$omp parallel do private (k0, fk, j0, fj, i0, fi) do k = 0, regkext-1 k0 = (srckoff + k) / dstkfac fk = mod(srckoff + k, dstkfac) diff --git a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 index 5c5d4cb87..2911e1e41 100644 --- a/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 +++ b/Carpet/CarpetLib/src/prolongate_3d_real8_weno.F90 @@ -275,6 +275,7 @@ subroutine prolongate_3d_real8_weno (src, srciext, srcjext, & !!$ Loop over fine region +!$omp parallel do private (k0, fk, j0, fj, i0, fi) do k = 0, regkext-1 k0 = (srckoff + k) / dstkfac fk = mod(srckoff + k, dstkfac) diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc index 5fa543604..7eebd36b0 100644 --- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -114,6 +114,7 @@ namespace CarpetLib { // Loop over coarse region +#pragma omp parallel for for (int k=0; k<regkext; ++k) { for (int j=0; j<regjext; ++j) { for (int i=0; i<regiext; ++i) { diff --git a/Carpet/CarpetLib/src/restrict_3d_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_rf2.cc index fc8490dae..87fb69c07 100644 --- a/Carpet/CarpetLib/src/restrict_3d_rf2.cc +++ b/Carpet/CarpetLib/src/restrict_3d_rf2.cc @@ -74,32 +74,33 @@ namespace CarpetLib { - size_t const srciext = srcext[0]; - size_t const srcjext = srcext[1]; - size_t const srckext = srcext[2]; + ptrdiff_t const srciext = srcext[0]; + ptrdiff_t const srcjext = srcext[1]; + ptrdiff_t const srckext = srcext[2]; - size_t const dstiext = dstext[0]; - size_t const dstjext = dstext[1]; - size_t const dstkext = dstext[2]; + ptrdiff_t const dstiext = dstext[0]; + ptrdiff_t const dstjext = dstext[1]; + ptrdiff_t const dstkext = dstext[2]; - size_t const regiext = regext[0]; - size_t const regjext = regext[1]; - size_t const regkext = regext[2]; + ptrdiff_t const regiext = regext[0]; + ptrdiff_t const regjext = regext[1]; + ptrdiff_t const regkext = regext[2]; - size_t const srcioff = srcoff[0]; - size_t const srcjoff = srcoff[1]; - size_t const srckoff = srcoff[2]; + ptrdiff_t const srcioff = srcoff[0]; + ptrdiff_t const srcjoff = srcoff[1]; + ptrdiff_t const srckoff = srcoff[2]; - size_t const dstioff = dstoff[0]; - size_t const dstjoff = dstoff[1]; - size_t const dstkoff = dstoff[2]; + ptrdiff_t const dstioff = dstoff[0]; + ptrdiff_t const dstjoff = dstoff[1]; + ptrdiff_t const dstkoff = dstoff[2]; // Loop over coarse region - for (size_t k=0; k<regkext; ++k) { - for (size_t j=0; j<regjext; ++j) { - for (size_t i=0; i<regiext; ++i) { +#pragma omp parallel for + for (ptrdiff_t k=0; k<regkext; ++k) { + for (ptrdiff_t j=0; j<regjext; ++j) { + for (ptrdiff_t i=0; i<regiext; ++i) { dst [DSTIND3(i, j, k)] = src [SRCIND3(2*i, 2*j, 2*k)]; |