From cb34e2dc04c82084d0e5fd1965153beb2f0c784a Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 8 Aug 2013 13:11:19 -0400 Subject: LoopControl: Turn fine thread communicators into global (static) variables --- Carpet/LoopControl/src/loopcontrol.cc | 63 ++++++++++++---------------- Carpet/LoopControl/src/loopcontrol.h | 2 - Carpet/LoopControl/src/loopcontrol_types.F90 | 13 +++--- 3 files changed, 33 insertions(+), 45 deletions(-) diff --git a/Carpet/LoopControl/src/loopcontrol.cc b/Carpet/LoopControl/src/loopcontrol.cc index 68df03b27..f13441499 100644 --- a/Carpet/LoopControl/src/loopcontrol.cc +++ b/Carpet/LoopControl/src/loopcontrol.cc @@ -75,17 +75,20 @@ static bool lc_do_settle = false; struct lc_thread_info_t { - char padding1[128]; // pad to ensure cache lines are not shared volatile int idx; // linear index of next coarse thread block - char padding2[128]; -}; +} CCTK_ATTRIBUTE_ALIGNED(128); // align to prevent sharing cache lines struct lc_fine_thread_comm_t { - char padding1[128]; // pad to ensure cache lines are not shared volatile int state; // waiting threads volatile int value; // broadcast value - char padding2[128]; -}; +} CCTK_ATTRIBUTE_ALIGNED(128); // align to prevent sharing cache lines + +// One object per coarse thread: shared between fine threads +// Note: Since we use a vector, the individual elements may not +// actually be aligned, but they will still be spaced apart and thus +// be placed into different cache lines. +static vector lc_fine_thread_comm; + @@ -753,6 +756,18 @@ void lc_control_init(lc_control_t *restrict const control, assert(get_num_coarse_threads() * get_num_fine_threads() == omp_get_num_threads()); + // Allocate fine thread communicators + if (int(lc_fine_thread_comm.size()) < get_num_coarse_threads()) { +#pragma omp barrier + assert(int(lc_fine_thread_comm.size()) < get_num_coarse_threads()); +#pragma omp master + { + lc_fine_thread_comm.resize(get_num_coarse_threads()); + } +#pragma omp barrier + assert(int(lc_fine_thread_comm.size()) == get_num_coarse_threads()); + } + // Initialize everything with a large, bogus value @@ -795,34 +810,13 @@ void lc_control_init(lc_control_t *restrict const control, } // Set up multithreading state - lc_thread_info_t *thread_info_ptr; -#pragma omp single copyprivate(thread_info_ptr) - { - thread_info_ptr = new lc_thread_info_t; - } - control->coarse_thread_info_ptr = thread_info_ptr; - { - lc_fine_thread_comm_t **fine_thread_comm_ptrs; -#pragma omp single copyprivate(fine_thread_comm_ptrs) - { - fine_thread_comm_ptrs = - new lc_fine_thread_comm_t*[get_num_coarse_threads()]; - } - if (get_fine_thread_num() == 0) { - lc_fine_thread_comm_t *const - fine_thread_comm_ptr = new lc_fine_thread_comm_t; - fine_thread_comm_ptr->state = 0; - fine_thread_comm_ptrs[get_coarse_thread_num()] = fine_thread_comm_ptr; - } -#pragma omp barrier - control->fine_thread_comm_ptr = - fine_thread_comm_ptrs[get_coarse_thread_num()]; -#pragma omp barrier -#pragma omp single nowait + lc_thread_info_t *thread_info_ptr; +#pragma omp single copyprivate(thread_info_ptr) { - delete[] fine_thread_comm_ptrs; + thread_info_ptr = new lc_thread_info_t; } + control->coarse_thread_info_ptr = thread_info_ptr; } // Set loop sizes @@ -1021,10 +1015,6 @@ void lc_control_finish(lc_control_t *restrict const control, // Tear down multithreading state delete control->coarse_thread_info_ptr; control->coarse_thread_info_ptr = NULL; - if (get_fine_thread_num() == 0) { - delete control->fine_thread_comm_ptr; - } - control->fine_thread_comm_ptr = NULL; } #pragma omp barrier } @@ -1059,7 +1049,8 @@ void lc_thread_step(lc_control_t *restrict const control) } } new_global_idx = - fine_thread_broadcast(control->fine_thread_comm_ptr, new_global_idx); + fine_thread_broadcast(&lc_fine_thread_comm[get_coarse_thread_num()], + new_global_idx); control->coarse_thread_done = space_global2local(control->coarse_thread, new_global_idx); space_idx2pos(control->coarse_thread); diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 93dfda4dd..c1855570d 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -81,8 +81,6 @@ extern "C" { /* fine threads; min, max, pos are undefined */ lc_space_t fine_thread; - /* shared between fine threads */ - struct lc_fine_thread_comm_t* fine_thread_comm_ptr; /* selftest: shared between all threads */ unsigned char* selftest_array; diff --git a/Carpet/LoopControl/src/loopcontrol_types.F90 b/Carpet/LoopControl/src/loopcontrol_types.F90 index f403aecca..393e27787 100644 --- a/Carpet/LoopControl/src/loopcontrol_types.F90 +++ b/Carpet/LoopControl/src/loopcontrol_types.F90 @@ -26,14 +26,13 @@ module loopcontrol_types type, bind(C) :: lc_control_t type(lc_vec_t) :: ash - type(lc_space_t) :: loop - type(lc_space_t) :: thread - CCTK_POINTER :: thread_idx_ptr - integer :: thread_done - type(lc_space_t) :: coarse - type(lc_space_t) :: fine + type(lc_space_t) :: overall + type(lc_space_t) :: coarse_thread + CCTK_POINTER :: coarse_thread_info_ptr + integer :: coarse_thread_done + type(lc_space_t) :: coarse_loop + type(lc_space_t) :: fine_loop type(lc_space_t) :: fine_thread - CCTK_POINTER :: fine_thread_info_ptr CCTK_POINTER :: selftest_array end type lc_control_t -- cgit v1.2.3