From 241381efbb3e69a1a3a4d9ea26c17dfeecc81e8e Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Mon, 14 Jan 2008 07:54:00 +0000 Subject: CarpetLib: Correct parallelisation of prolongation operators darcs-hash:20080114075419-dae7b-b59ccf120d369c4ea8715f856a68e63a0c396f63.gz --- Carpet/CarpetLib/src/data.cc | 142 ++++++++++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 35 deletions(-) (limited to 'Carpet/CarpetLib/src') diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index 300ddc0bb..f499b4c87 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -10,11 +10,15 @@ #include #include +#ifdef _OPENMP +# include +#endif #include "cctk.h" #include "cctk_Parameters.h" #include "bbox.hh" +#include "bboxset.hh" #include "defs.hh" #include "dist.hh" #include "timestat.hh" @@ -29,6 +33,69 @@ using namespace CarpetLib; +template +static +void +call_operator (void + the_operator (T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox), + T const * restrict const src, + ivect3 const & restrict srcext, + T * restrict const dst, + ivect3 const & restrict dstext, + ibbox3 const & restrict srcbbox, + ibbox3 const & restrict dstbbox, + ibbox3 const & restrict regbbox) +{ +#ifndef _OPENMP + the_operator (src, srcext, dst, dstext, srcbbox, dstbbox, regbbox); +#else +# if ! defined (CARPET_OPTIMISE) + ibset allregbboxes; +# endif + _Pragma ("omp parallel") { + int const num_threads = omp_get_num_threads(); + int const thread_num = omp_get_thread_num(); + // Parallelise in z direction + int const dir = 2; + int const stride = regbbox.stride()[dir]; + int const first_point = regbbox.lower()[dir]; + int const last_point = regbbox.upper()[dir] + stride; + int const num_points = last_point - first_point; + assert (num_points >= 0); + assert (num_points % stride == 0); + int const my_num_points = + (num_points / stride + num_threads - 1) / num_threads * stride; + int const my_first_point = + min (last_point, first_point + thread_num * my_num_points); + int const my_last_point = + max (my_first_point, my_first_point + my_num_points); + ibbox3 const myregbbox + (regbbox.lower().replace (dir, my_first_point), + regbbox.upper().replace (dir, my_last_point - stride), + regbbox.stride()); + if (not myregbbox.empty()) { + the_operator (src, srcext, dst, dstext, srcbbox, dstbbox, myregbbox); + } +# if ! defined (NDEBUG) && ! defined (CARPET_OPTIMISE) + _Pragma ("omp critical") { + allregbboxes += myregbbox; + } +# endif + } +# if ! defined (CARPET_OPTIMISE) + assert (allregbboxes == ibset (regbbox)); +# endif +#endif +} + + + // Fortran wrappers template @@ -502,31 +569,34 @@ transfer_prolongate (data const * const src, timer.start (); switch (order_space) { case 1: - prolongate_3d_o1_rf2 (static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + call_operator (prolongate_3d_o1_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); break; case 3: - prolongate_3d_o3_rf2 (static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + call_operator (prolongate_3d_o3_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); break; case 5: - prolongate_3d_o5_rf2 (static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + call_operator (prolongate_3d_o5_rf2, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); break; default: CCTK_WARN (CCTK_WARN_ABORT, @@ -546,13 +616,14 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"ENO\" with order_space==1"); break; case 3: - prolongate_3d_eno (static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + call_operator (prolongate_3d_eno, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); break; default: CCTK_WARN (CCTK_WARN_ABORT, @@ -576,13 +647,14 @@ transfer_prolongate (data const * const src, "There is no stencil for op=\"WENO\" with order_space=3"); break; case 5: - prolongate_3d_eno (static_cast (src->storage()), - src->shape(), - static_cast (this->storage()), - this->shape(), - src->extent(), - this->extent(), - box); + call_operator (prolongate_3d_eno, + static_cast (src->storage()), + src->shape(), + static_cast (this->storage()), + this->shape(), + src->extent(), + this->extent(), + box); break; default: CCTK_WARN (CCTK_WARN_ABORT, -- cgit v1.2.3