diff options
-rw-r--r-- | Carpet/LoopControl/README | 4 | ||||
-rw-r--r-- | Carpet/LoopControl/param.ccl | 16 | ||||
-rw-r--r-- | Carpet/LoopControl/schedule.ccl | 10 | ||||
-rw-r--r-- | Carpet/LoopControl/src/lc_auto.c | 4 | ||||
-rw-r--r-- | Carpet/LoopControl/src/lc_siman.c | 4 | ||||
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.c | 60 | ||||
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol_fortran.h | 2 |
7 files changed, 80 insertions, 20 deletions
diff --git a/Carpet/LoopControl/README b/Carpet/LoopControl/README index 231398bb0..50e2eacb5 100644 --- a/Carpet/LoopControl/README +++ b/Carpet/LoopControl/README @@ -79,3 +79,7 @@ Combine several strategies: - Note that a full sampling is too expensive. - Use a hierarchical strategy for this? - Use random sampling for this? + +5. Offer the parallelisation mechanism, or the tiling mechanism + separately, without actually looping over the array or over the + tiles. diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl index f49e73b37..5875c8dcc 100644 --- a/Carpet/LoopControl/param.ccl +++ b/Carpet/LoopControl/param.ccl @@ -2,6 +2,10 @@ # General options +BOOLEAN printstats "Output timing statistics at shutdown" STEERABLE=recover +{ +} "no" + BOOLEAN verbose "Verbosity" STEERABLE=always { } "no" @@ -56,6 +60,14 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove +# Use legacy configuration + +BOOLEAN legacy_init "Initialise with legacy configuration (usually slower)" STEERABLE=recover +{ +} "no" + + + # Automatic: simple cycling BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover @@ -118,6 +130,10 @@ CCTK_REAL maximum_setup_overhead "Maximum allowable administrative overhead" STE 0.0:* :: "" } 0.01 +BOOLEAN ignore_initial_overhead "Ignore the overhead from the initial setup" STEERABLE=recover +{ +} "yes" + CCTK_REAL probability_small_jump "Probability for a small jump once a local minimum has been reached" STEERABLE=always { 0.0:1.0 :: "" diff --git a/Carpet/LoopControl/schedule.ccl b/Carpet/LoopControl/schedule.ccl index 56204414d..fc14c7b0d 100644 --- a/Carpet/LoopControl/schedule.ccl +++ b/Carpet/LoopControl/schedule.ccl @@ -1,6 +1,8 @@ # Schedule definitions for thorn LoopControl -SCHEDULE lc_printstats AT terminate -{ - LANG: C -} "Output loop control statistics" +if (printstats) { + SCHEDULE lc_printstats AT terminate + { + LANG: C + } "Output loop control statistics" +} diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c index 96c6b39d7..fcfecc47b 100644 --- a/Carpet/LoopControl/src/lc_auto.c +++ b/Carpet/LoopControl/src/lc_auto.c @@ -6,6 +6,10 @@ #include <cctk.h> #include <cctk_Parameters.h> +#ifdef HAVE_TGMATH_H +# include <tgmath.h> +#endif + #include "lc_siman.h" #include "lc_auto.h" diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c index d34d3da0a..d9e11839e 100644 --- a/Carpet/LoopControl/src/lc_siman.c +++ b/Carpet/LoopControl/src/lc_siman.c @@ -30,6 +30,10 @@ #include <cctk.h> +#ifdef HAVE_TGMATH_H +# include <tgmath.h> +#endif + #include "lc_siman.h" static inline diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c index 72fd3e40e..6f420e0c1 100644 --- a/Carpet/LoopControl/src/loopcontrol.c +++ b/Carpet/LoopControl/src/loopcontrol.c @@ -15,6 +15,10 @@ #include <cctk.h> #include <cctk_Parameters.h> +#ifdef HAVE_TGMATH_H +# include <tgmath.h> +#endif + #include "loopcontrol.h" #include "lc_auto.h" @@ -383,6 +387,7 @@ lc_statset_init (lc_statset_t * restrict const ls, ls->topologies[n].nthreads[2]); } } + assert (ls->ntopologies > 0); /*** Tilings ****************************************************************/ @@ -398,28 +403,24 @@ lc_statset_init (lc_statset_t * restrict const ls, printf ("Dimension %d: %d points\n", d, ls->npoints[d]); } ls->tilings[d] = malloc (maxntilings * sizeof * ls->tilings[d]); - int ntilings; find_tiling_specifications (ls->tilings[d], maxntilings, & ls->ntilings[d], ls->npoints[d]); -#if 0 - ls->tilings[d] = - realloc (ls->tilings[d], ls->ntilings[d] * sizeof * ls->tilings[d]); -#endif ls->topology_ntilings[d] = malloc (ls->ntopologies * sizeof * ls->topology_ntilings[d]); for (int n = 0; n < ls->ntopologies; ++n) { int tiling; - for (tiling = 0; tiling < ls->ntilings[d]; ++tiling) { + for (tiling = 1; tiling < ls->ntilings[d]; ++tiling) { if (ls->tilings[d][tiling].npoints * ls->topologies[n].nthreads[d] > ls->npoints[d]) { break; } } - ls->topology_ntilings[d][n] = tiling; + /* Always allow at least one tiling */ + ls->topology_ntilings[d][n] = tiling == 0 ? 1 : tiling; } if (debug) { - printf (" Found %d possible tilings\n", ntilings); + printf (" Found %d possible tilings\n", ls->ntilings[d]); printf (" "); for (int n = 0; n < ls->ntilings[d]; ++n) { printf (" %d", ls->tilings[d][n].npoints); @@ -605,8 +606,22 @@ lc_control_init (lc_control_t * restrict const lc, } else { /* Split in the k direction */ - state.topology = ls->ntopologies - 1; + for (state.topology = ls->ntopologies - 1; + state.topology >= 0; + -- state.topology) + { + int have_tilings = 1; + for (int d=0; d<3; ++d) { + have_tilings = have_tilings && + ls->topology_ntilings[d][state.topology] > 0; + } + if (have_tilings) break; + } + if (state.topology < 0) { + assert (0); + CCTK_WARN (CCTK_WARN_ABORT, "grid too small"); + } } /* Select tiling */ @@ -616,6 +631,7 @@ lc_control_init (lc_control_t * restrict const lc, state.tiling[0] = -1; } else { /* as many points as possible */ + assert (state.topology >= 0); state.tiling[0] = ls->topology_ntilings[0][state.topology] - 1; } @@ -626,7 +642,12 @@ lc_control_init (lc_control_t * restrict const lc, if (cycle_j_tilings) { /* cycle through all tilings */ static int count = 0; + assert (state.topology >= 0); state.tiling[1] = (count ++) % ls->topology_ntilings[1][state.topology]; + } else if (legacy_init) { + /* as many points as possible */ + assert (state.topology >= 0); + state.tiling[1] = ls->topology_ntilings[1][state.topology] - 1; } else { /* as few points as possible */ state.tiling[1] = 0; @@ -638,6 +659,7 @@ lc_control_init (lc_control_t * restrict const lc, state.tiling[2] = -1; } else { /* as many points as possible */ + assert (state.topology >= 0); state.tiling[2] = ls->topology_ntilings[2][state.topology] - 1; } @@ -752,6 +774,15 @@ lc_control_init (lc_control_t * restrict const lc, void lc_control_finish (lc_control_t * restrict const lc) { + lc_stattime_t * restrict const lt = lc->stattime; + lc_statset_t * restrict const ls = lc->statset; + + int ignore_iteration; + _Pragma ("omp single copyprivate (ignore_iteration)") { + DECLARE_CCTK_PARAMETERS; + ignore_iteration = ignore_initial_overhead && lt->time_count == 0.0; + } + /* Timer */ double const time_calc_end = omp_get_wtime(); double const time_calc_begin = lc->time_calc_begin; @@ -759,15 +790,14 @@ lc_control_finish (lc_control_t * restrict const lc) double const time_setup_end = time_calc_begin; double const time_setup_begin = lc->time_setup_begin; - double const time_setup_sum = time_setup_end - time_setup_begin; + double const time_setup_sum = + ignore_iteration ? 0.0 : time_setup_end - time_setup_begin; double const time_setup_sum2 = pow (time_setup_sum, 2); double const time_calc_sum = time_calc_end - time_calc_begin; double const time_calc_sum2 = pow (time_calc_sum, 2); /* Update statistics */ - lc_stattime_t * restrict const lt = lc->stattime; - lc_statset_t * restrict const ls = lc->statset; _Pragma ("omp critical") { lt->time_count += 1.0; @@ -831,7 +861,6 @@ void lc_printstats (CCTK_ARGUMENTS) { DECLARE_CCTK_PARAMETERS; - if (! verbose) return; int nmaps = 0; for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) { @@ -868,10 +897,11 @@ lc_printstats (CCTK_ARGUMENTS) } ++ ntimes; } + double const avg_calc = sum_calc / sum_count; printf (" total count: %g total setup: %g total calc: %g\n", sum_count, sum_setup, sum_calc); - printf (" min calc: %g (#%d)\n", - min_calc, imin_calc); + printf (" avg calc: %g min calc: %g (#%d)\n", + avg_calc, min_calc, imin_calc); ++ nsets; } ++ nmaps; diff --git a/Carpet/LoopControl/src/loopcontrol_fortran.h b/Carpet/LoopControl/src/loopcontrol_fortran.h index e80bf358f..7edf22d84 100644 --- a/Carpet/LoopControl/src/loopcontrol_fortran.h +++ b/Carpet/LoopControl/src/loopcontrol_fortran.h @@ -1,4 +1,4 @@ -/* -*-f90-mode-*- */ +/* -*-f90-*- */ #ifndef LOOPCONTROL_FORTRAN_H #define LOOPCONTROL_FORTRAN_H |