aboutsummaryrefslogtreecommitdiff
path: root/Carpet/LoopControl
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2011-01-03 23:11:50 -0500
committerBarry Wardell <barry.wardell@gmail.com>2011-12-14 18:25:54 +0000
commitf90266b83dbfedd7bf66e351770ae33586519de0 (patch)
treeca9229ab54ee3ed63655f8203e6426f729be7525 /Carpet/LoopControl
parent62f4164e45f4eff99bd213f469f2d3057e854a48 (diff)
LoopControl: Update statistics; activate statistics by default
Handle very small grids correctly. Output statistics all the time. Introduce multiple verbosity levels for statistics. Update statistics to include saved time and comparison between best and initial time. Activate hill climbing algorithm by default.
Diffstat (limited to 'Carpet/LoopControl')
-rw-r--r--Carpet/LoopControl/param.ccl26
-rw-r--r--Carpet/LoopControl/schedule.ccl9
-rw-r--r--Carpet/LoopControl/src/lc_hill.c60
-rw-r--r--Carpet/LoopControl/src/lc_siman.c13
-rw-r--r--Carpet/LoopControl/src/lc_siman.h13
-rw-r--r--Carpet/LoopControl/src/loopcontrol.c246
-rw-r--r--Carpet/LoopControl/src/loopcontrol.h27
7 files changed, 294 insertions, 100 deletions
diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl
index f529229e2..0f3676bac 100644
--- a/Carpet/LoopControl/param.ccl
+++ b/Carpet/LoopControl/param.ccl
@@ -3,9 +3,27 @@
#################
# General options
-BOOLEAN printstats "Output timing statistics at shutdown" STEERABLE=recover
+BOOLEAN printstats "Output timing statistics" STEERABLE=recover
{
-} "no"
+} "yes"
+
+CCTK_REAL printstats_every_minutes "Output timing statistics every so many minutes" STEERABLE=always
+{
+ 0.0:* :: ""
+} 60.0
+
+CCTK_REAL printstats_threshold "Output timing statistics for loops costing at least this many percent" STEERABLE=always
+{
+ 0.0:100.0 :: ""
+} 2.0
+
+CCTK_INT printstats_verbosity "Level of detail for statistics" STEERABLE=always
+{
+ 0 :: "Output only a global summary"
+ 1 :: "Output summary for every loop"
+ 2 :: "Output summary for every loop parameter set"
+ 3 :: "Output everything"
+} 1
BOOLEAN verbose "Verbosity" STEERABLE=always
{
@@ -68,7 +86,7 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove
BOOLEAN legacy_init "Initialise with legacy configuration (usually slower)" STEERABLE=recover
{
-} "no"
+} "yes"
@@ -130,7 +148,7 @@ CCTK_REAL siman_T_min "stopping criterion" STEERABLE=recover
BOOLEAN use_random_restart_hill_climbing "http://en.wikipedia.org/wiki/Hill_climbing http://en.wikipedia.org/wiki/Tabu_search" STEERABLE=always
{
-} "no"
+} "yes"
CCTK_REAL maximum_setup_overhead "Maximum allowable administrative overhead" STEERABLE=always
{
diff --git a/Carpet/LoopControl/schedule.ccl b/Carpet/LoopControl/schedule.ccl
index fc14c7b0d..5c33eecc2 100644
--- a/Carpet/LoopControl/schedule.ccl
+++ b/Carpet/LoopControl/schedule.ccl
@@ -1,8 +1,15 @@
# Schedule definitions for thorn LoopControl
if (printstats) {
- SCHEDULE lc_printstats AT terminate
+ SCHEDULE lc_printstats_analysis AT analysis
{
LANG: C
+ OPTIONS: meta
+ } "Output loop control statistics"
+
+ SCHEDULE lc_printstats_terminate AT terminate
+ {
+ LANG: C
+ OPTIONS: meta
} "Output loop control statistics"
}
diff --git a/Carpet/LoopControl/src/lc_hill.c b/Carpet/LoopControl/src/lc_hill.c
index 6ab44dba8..362cf8e62 100644
--- a/Carpet/LoopControl/src/lc_hill.c
+++ b/Carpet/LoopControl/src/lc_hill.c
@@ -84,9 +84,9 @@ lc_hill_init (lc_statset_t * restrict const ls,
return;
}
- /* If the overhead has become too large, do nothing. */
+ /* If the overhead has become too large, do nothing. */
if (ls->time_setup_sum > maximum_setup_overhead * ls->time_calc_sum) {
- /* Stay at the old state. */
+ /* Stay at the old state. */
* new_state = lh->state;
return;
}
@@ -120,10 +120,18 @@ lc_hill_init (lc_statset_t * restrict const ls,
CCTK_INFO ("Hill climbing: Updating best time");
}
lh->best_time = lh->time;
+ /* Is there now a better state? */
+ for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
+ double const time = lt->time_calc_sum / lt->time_count;
+ if (time < lh->best_time) {
+ lh->best = lt->state;
+ lh->best_time = time;
+ }
+ }
}
/* Compare the time for the current state with the time for the
- previous state. If the previous state was better, backtrack. */
+ previous state. If the previous state was better, backtrack. */
if (lh->have_previous && lh->previous_time < lh->time) {
if (verbose) {
CCTK_INFO ("Hill climbing: Backtracking");
@@ -157,7 +165,7 @@ lc_hill_init (lc_statset_t * restrict const ls,
search:;
- /* Look which neighbours exist. */
+ /* Look which neighbours exist. */
typedef enum { nb_boundary, nb_missing, nb_exists } neighbour_t;
neighbour_t neighbours[3][2];
lc_state_t nb_state[3][2];
@@ -194,7 +202,7 @@ lc_hill_init (lc_statset_t * restrict const ls,
}
/* If not all neighbours exist, then choose a random neighbour and
- move there. */
+ move there. */
if (num_nonexist_states > 0) {
if (verbose) {
CCTK_INFO ("Hill climbing: Examining a new state");
@@ -205,32 +213,40 @@ lc_hill_init (lc_statset_t * restrict const ls,
return;
}
- /* All neighbours exist. Look whether we are in a local
- minimum. */
+ if (! nb_minimum_time) {
+ /* There are no neighbours. Stay where we are. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: No neighbours, staying put");
+ }
+ * new_state = lh->state;
+ return;
+ }
+
+ /* All neighbours exist. Look whether we are in a local minimum. */
assert (nb_minimum_time);
if (minimum_time >= lh->time) {
- /* We are in a local minimum. */
+ /* We are in a local minimum. */
if (verbose) {
CCTK_INFO ("Hill climbing: Local minimum reached");
}
- /* Every so often take a small jump. */
+ /* Every so often take a small jump. */
if (drand() < probability_small_jump) {
- /* Be curious, go somewhere nearby. */
+ /* Be curious, go somewhere nearby. */
if (verbose) {
CCTK_INFO ("Hill climbing: Making a small jump");
}
for (int ntries = 0; ntries < max_jump_attempts; ++ ntries) {
lc_state_t try_state = lh->state;
if (drand() < 0.25) {
- /* Change the topology, but not the tiling. */
+ /* Change the topology, but not the tiling. */
try_state.topology = irand (ls->ntopologies);
for (int d=0; d<3; ++d) {
if (try_state.tiling[d] >=
ls->topology_ntilings[d][try_state.topology])
{
/* The tiling doesn't fit for this new topology; don't
- choose this topology. */
+ choose this topology. */
goto next_try;
}
}
@@ -252,12 +268,12 @@ lc_hill_init (lc_statset_t * restrict const ls,
}
next_try:;
}
- /* Don't jump after all. */
+ /* Don't jump after all. */
}
- /* Every so often take a random jump. */
+ /* Every so often take a random jump. */
if (drand() < probability_random_jump) {
- /* Be adventurous, go somewhere unknown. */
+ /* Be adventurous, go somewhere unknown. */
if (verbose) {
CCTK_INFO ("Hill climbing: Jumping randomly");
}
@@ -269,7 +285,7 @@ lc_hill_init (lc_statset_t * restrict const ls,
irand (ls->topology_ntilings[d][try_state.topology]);
}
if (! lc_stattime_find (ls, & try_state)) {
- /* The new state is hitherto unknown, use it. */
+ /* The new state is hitherto unknown, use it. */
lh->state = try_state;
lh->excursion_start = lh->iteration;
lh->have_previous = 0; /* disable backtracking */
@@ -277,13 +293,13 @@ lc_hill_init (lc_statset_t * restrict const ls,
return;
}
}
- /* Don't jump after all. */
+ /* Don't jump after all. */
}
/* If the current state is not the best state, give up and go
- back. */
+ back. */
if (! lc_state_equal (& lh->state, & lh->best)) {
- /* Revert to the best known state. */
+ /* Revert to the best known state. */
if (verbose) {
CCTK_INFO ("Hill climbing: Reverting to best known state");
}
@@ -294,7 +310,7 @@ lc_hill_init (lc_statset_t * restrict const ls,
return;
}
- /* Be content, do nothing. */
+ /* Be content, do nothing. */
if (verbose) {
CCTK_INFO ("Hill climbing: Resting");
}
@@ -302,8 +318,8 @@ lc_hill_init (lc_statset_t * restrict const ls,
return;
}
- /* One of the neighbours is better. Move to this neighbour, and
- continue the search there. */
+ /* One of the neighbours is better. Move to this neighbour, and
+ continue the search there. */
if (verbose) {
CCTK_INFO ("Hill climbing: Found a better neighbour, going there");
}
diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c
index b69e03d5c..133111898 100644
--- a/Carpet/LoopControl/src/lc_siman.c
+++ b/Carpet/LoopControl/src/lc_siman.c
@@ -4,19 +4,20 @@
/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
*/
#include <assert.h>
diff --git a/Carpet/LoopControl/src/lc_siman.h b/Carpet/LoopControl/src/lc_siman.h
index c848ba378..d5517ed64 100644
--- a/Carpet/LoopControl/src/lc_siman.h
+++ b/Carpet/LoopControl/src/lc_siman.h
@@ -4,19 +4,20 @@
/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
*/
#ifndef SIMAN_H
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index fa2761717..9094c12a2 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -71,12 +71,12 @@ lc_statmap_t * lc_statmap_list = NULL;
/* Find all possible thread topologies */
/* This finds all possible thread topologies which can be expressed as
- NIxNJxNK x NIIxNJJxNKK. More complex topologies, e.g. based on a
+ NIxNJxNK x NIIxNJJxNKK. More complex topologies, e.g. based on a
recursive subdivision, are not considered (and cannot be expressed
- with the data structures currently used in LoopControl). I expect
+ with the data structures currently used in LoopControl). I expect
that more complex topologies are not necessary, since the number of
threads is usually quite small and contains many small factors in
- its prime decomposition. */
+ its prime decomposition. */
static
void
find_thread_topologies (lc_topology_t * restrict const topologies,
@@ -123,19 +123,19 @@ find_thread_topologies (lc_topology_t * restrict const topologies,
}
-#if 0
+#if 1
/* Find "good" tiling specifications */
-/* This calculates a subset of all possible thread specifications.
- One aim is to reduce the search space by disregarding some
- specifications. The other aim is to distribute the specifications
+/* This calculates a subset of all possible thread specifications. One
+ aim is to reduce the search space by disregarding some
+ specifications. The other aim is to distribute the specifications
"equally", so that one does not have to spend much effort
investigating tiling specifications with very similar properties.
For example, if there are 200 grid points, then half of the
possible tiling specifications consist of splitting the domain into
- two subdomains with [100+N, 100-N] points. This is avoided by
+ two subdomains with [100+N, 100-N] points. This is avoided by
covering all possible tiling specifications in exponentially
- growing step sizes. */
+ growing step sizes. */
static
int tiling_compare (const void * const a, const void * const b)
{
@@ -152,15 +152,15 @@ find_tiling_specifications (lc_tiling_t * restrict const tilings,
int const npoints)
{
/* In order to reduce the number of possible tilings, require that
- the step sizes differ by more than 10%. */
+ the step sizes differ by more than 10%. */
double const distance_factor = 1.1;
/* Determine the "good" step sizes in two passes: first small step
sizes from 1 up to snpoints, then large step sizes from npoints
- down to snpoints+1. */
+ down to snpoints+1. */
int const snpoints = floor (sqrt (npoints));
/* For N grid points and a minimum spacing factor F, there are at
- most log(N) / log(F) possible tilings. There will be fewer,
- since the actual spacings will be rounded up to integers. */
+ most log(N) / log(F) possible tilings. There will be fewer, since
+ the actual spacings will be rounded up to integers. */
* ntilings = 0;
@@ -188,8 +188,11 @@ find_tiling_specifications (lc_tiling_t * restrict const tilings,
/* Sort */
qsort (tilings, * ntilings, sizeof * tilings, tiling_compare);
+
+ assert (* ntilings >= 1);
}
-#endif
+
+#else
static
void
@@ -199,11 +202,11 @@ find_tiling_specifications (lc_tiling_t * restrict const tilings,
int const npoints)
{
/* In order to reduce the number of possible tilings, require that
- the step sizes differ by more than 10%. */
+ the step sizes differ by more than 10%. */
double const distance_factor = 1.1;
/* For N grid points and a minimum spacing factor F, there are at
- most log(N) / log(F) possible tilings. There will be fewer,
- since the actual spacings will be rounded up to integers. */
+ most log(N) / log(F) possible tilings. There will be fewer, since
+ the actual spacings will be rounded up to integers. */
* ntilings = 0;
@@ -224,6 +227,8 @@ find_tiling_specifications (lc_tiling_t * restrict const tilings,
++ * ntilings;
}
+#endif
+
/* Initialise control parameter set statistics */
@@ -326,10 +331,12 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
/* Initialise statistics */
lt->time_count = 0.0;
+ lt->time_count_init = 0.0;
lt->time_setup_sum = 0.0;
lt->time_setup_sum2 = 0.0;
lt->time_calc_sum = 0.0;
lt->time_calc_sum2 = 0.0;
+ lt->time_calc_init = 0.0;
lt->last_updated = 0.0; /* never updated */
@@ -510,6 +517,13 @@ lc_statset_init (lc_statset_t * restrict const ls,
break;
}
}
+ for (int t=tiling; t < ls->ntilings[d]; ++t) {
+ assert (ls->tilings[d][t].npoints *
+ ls->topologies[n].nthreads[0][d] *
+ ls->topologies[n].nthreads[1][d] >
+ ls->npoints[d]);
+ }
+ assert (tiling != 0); /* this can't be? */
if (tiling == 0) {
/* Always allow at least one tiling */
tiling = 1;
@@ -541,10 +555,12 @@ lc_statset_init (lc_statset_t * restrict const ls,
/* Initialise statistics */
ls->time_count = 0.0;
+ ls->time_count_init = 0.0;
ls->time_setup_sum = 0.0;
ls->time_setup_sum2 = 0.0;
ls->time_calc_sum = 0.0;
ls->time_calc_sum2 = 0.0;
+ ls->time_calc_init = 0.0;
/* Append to loop statistics list */
ls->next = lm->statset_list;
@@ -921,10 +937,12 @@ lc_control_finish (lc_control_t * restrict const lc)
lc_statset_t * restrict const ls = lc->statset;
int ignore_iteration;
+ int first_iteration;
#pragma omp single copyprivate (ignore_iteration)
{
DECLARE_CCTK_PARAMETERS;
ignore_iteration = ignore_initial_overhead && lt->time_count == 0.0;
+ first_iteration = lt->time_count_init == 0.0;
}
/* Add a barrier to catch load imbalances */
@@ -938,8 +956,7 @@ lc_control_finish (lc_control_t * restrict const lc)
double const time_setup_end = time_calc_begin;
double const time_setup_begin = lc->time_setup_begin;
- double const time_setup_sum =
- ignore_iteration ? 0.0 : time_setup_end - time_setup_begin;
+ double const time_setup_sum = time_setup_end - time_setup_begin;
double const time_setup_sum2 = pow (time_setup_sum, 2);
double const time_calc_sum = time_calc_end - time_calc_begin;
@@ -949,20 +966,24 @@ lc_control_finish (lc_control_t * restrict const lc)
#pragma omp critical
{
lt->time_count += 1.0;
+ if (first_iteration) lt->time_count_init += 1.0;
- lt->time_setup_sum += time_setup_sum;
- lt->time_setup_sum2 += time_setup_sum2;
+ if (! ignore_iteration) lt->time_setup_sum += time_setup_sum;
+ if (! ignore_iteration) lt->time_setup_sum2 += time_setup_sum2;
lt->time_calc_sum += time_calc_sum;
lt->time_calc_sum2 += time_calc_sum2;
+ if (first_iteration) lt->time_calc_init += time_calc_sum;
ls->time_count += 1.0;
+ if (first_iteration) ls->time_count_init += 1.0;
- ls->time_setup_sum += time_setup_sum;
- ls->time_setup_sum2 += time_setup_sum2;
+ if (! ignore_iteration) ls->time_setup_sum += time_setup_sum;
+ if (! ignore_iteration) ls->time_setup_sum2 += time_setup_sum2;
ls->time_calc_sum += time_calc_sum;
ls->time_calc_sum2 += time_calc_sum2;
+ if (first_iteration) ls->time_calc_init += time_calc_sum;
}
#pragma omp master
@@ -1010,57 +1031,178 @@ stddev (double const c, double const s, double const s2)
+/* Output statistics */
+void
+lc_printstats (void);
void
-lc_printstats (CCTK_ARGUMENTS)
+lc_printstats (void)
{
DECLARE_CCTK_PARAMETERS;
+ printf ("LoopControl timing statistics:\n");
+
+ double total_calc_time = 0.0;
+ for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) {
+ for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) {
+ for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
+ total_calc_time += lt->time_calc_sum;
+ }
+ }
+ }
+
+ double total_saved = 0.0;
int nmaps = 0;
for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) {
- printf ("statmap #%d \"%s\":\n",
- nmaps,
- lm->name);
+
+ double calc_time = 0.0;
+ for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) {
+ for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
+ calc_time += lt->time_calc_sum;
+ }
+ }
+ if (calc_time < printstats_threshold / 100.0 * total_calc_time) continue;
+
+ if (printstats_verbosity >= 1) {
+ printf ("Loop #%d \"%s\":\n",
+ nmaps,
+ lm->name);
+ }
+ double lm_sum_count = 0.0;
+ double lm_sum_setup = 0.0;
+ double lm_sum_calc = 0.0;
+ double lm_sum_count_init = 0.0;
+ double lm_sum_init = 0.0;
+ double lm_sum_improv = 0.0;
int nsets = 0;
for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) {
- printf (" statset #%d nthreads=%d npoints=[%d,%d,%d]\n",
- nsets,
- ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]);
- double sum_count = 0.0;
- double sum_setup = 0.0;
- double sum_calc = 0.0;
- double min_calc = DBL_MAX;
- int imin_calc = -1;
+ if (printstats_verbosity >= 2) {
+ printf (" Parameter set #%d nthreads=%d npoints=[%d,%d,%d]\n",
+ nsets,
+ ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]);
+ }
+ double sum_count = 0.0;
+ double sum_setup = 0.0;
+ double sum_calc = 0.0;
+ double sum_count_init = 0.0;
+ double sum_init = 0.0;
+ double min_calc = DBL_MAX;
+ int imin_calc = -1;
+ double max_calc = 0.0;
+ int imax_calc = -1;
int ntimes = 0;
for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
- printf (" stattime #%d topology=%d [%d,%d,%d]x[%d,%d,%d] tiling=[%d,%d,%d]\n",
- ntimes,
- lt->state.topology,
- lt->inthreads, lt->jnthreads, lt->knthreads,
- lt->inithreads, lt->jnithreads, lt->knithreads,
- lt->inpoints, lt->jnpoints, lt->knpoints);
+ if (printstats_verbosity >= 3) {
+ printf (" Configuration #%d topology=%d [%d,%d,%d]x[%d,%d,%d] tiling=[%d,%d,%d]\n",
+ ntimes,
+ lt->state.topology,
+ lt->inthreads, lt->jnthreads, lt->knthreads,
+ lt->inithreads, lt->jnithreads, lt->knithreads,
+ lt->inpoints, lt->jnpoints, lt->knpoints);
+ }
double const count = lt->time_count;
double const setup = lt->time_setup_sum / count;
double const calc = lt->time_calc_sum / count;
- printf (" count: %g setup: %g calc: %g\n",
- count, setup, calc);
- sum_count += lt->time_count;
- sum_setup += lt->time_setup_sum;
- sum_calc += lt->time_calc_sum;
+ double const init = lt->time_calc_init / lt->time_count_init;
+ if (printstats_verbosity >= 3) {
+ printf (" count: %g setup: %g first: %g calc: %g\n",
+ count, setup, init, calc);
+ }
+ sum_count += lt->time_count;
+ sum_setup += lt->time_setup_sum;
+ sum_calc += lt->time_calc_sum;
+ sum_count_init += lt->time_count_init;
+ sum_init += lt->time_calc_init;
if (calc < min_calc) {
- min_calc = calc;
+ min_calc = calc;
imin_calc = ntimes;
}
+ if (calc > max_calc) {
+ max_calc = calc;
+ imax_calc = ntimes;
+ }
++ ntimes;
}
- double const avg_calc = sum_calc / sum_count;
- printf (" total count: %g total setup: %g total calc: %g\n",
- sum_count, sum_setup, sum_calc);
- printf (" avg calc: %g min calc: %g (#%d)\n",
- avg_calc, min_calc, imin_calc);
+ double const init_calc = sum_init / sum_count_init;
+ double const avg_calc = sum_calc / sum_count;
+ double const saved = (init_calc - avg_calc) * sum_count;
+ double const improv = (init_calc - min_calc) / init_calc;
+ if (printstats_verbosity >= 2) {
+ printf (" total count: %g total setup: %g total calc: %g\n",
+ sum_count, sum_setup, sum_calc);
+ printf (" avg calc: %g min calc: %g (#%d) max calc: %g (#%d)\n",
+ avg_calc, min_calc, imin_calc, max_calc, imax_calc);
+ if (printstats_verbosity < 3) {
+ int ntimes = 0;
+ for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
+ if (ntimes == imin_calc || ntimes == imax_calc) {
+ printf (" #%d: topology=%d [%d,%d,%d]x[%d,%d,%d] tiling=[%d,%d,%d]\n",
+ ntimes,
+ lt->state.topology,
+ lt->inthreads, lt->jnthreads, lt->knthreads,
+ lt->inithreads, lt->jnithreads, lt->knithreads,
+ lt->inpoints, lt->jnpoints, lt->knpoints);
+ }
+ ++ ntimes;
+ }
+ }
+ printf (" first calc: %g improvement: %.0f%% saved: %g\n",
+ init_calc, 100.0*improv, saved);
+ }
+ lm_sum_count += sum_count;
+ lm_sum_setup += sum_setup;
+ lm_sum_calc += sum_calc;
+ lm_sum_count_init += sum_count_init;
+ lm_sum_init += sum_init;
+ lm_sum_improv += improv * sum_count;
++ nsets;
}
+ double const init_calc = lm_sum_init / lm_sum_count_init;
+ double const avg_calc = lm_sum_calc / lm_sum_count;
+ double const saved = (init_calc - avg_calc) * lm_sum_count;
+ double const avg_improv = lm_sum_improv / lm_sum_count;
+ if (printstats_verbosity >= 1) {
+ printf (" total count: %g total setup: %g total calc: %g\n",
+ lm_sum_count, lm_sum_setup, lm_sum_calc);
+ printf (" avg calc: %g avg first calc: %g\n",
+ avg_calc, init_calc);
+ printf (" avg improvement: %.0f%% saved: %g seconds\n",
+ 100.0*avg_improv, saved);
+ }
+ total_saved += saved;
++ nmaps;
}
+
+ printf ("Total calculation time: %g seconds; total saved time: %g seconds\n",
+ total_calc_time, total_saved);
+}
+
+
+
+void
+lc_printstats_analysis (CCTK_ARGUMENTS);
+void
+lc_printstats_analysis (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ static int last_output = 0;
+
+ int const current_time = CCTK_RunTime();
+ if (current_time >= last_output + 60.0 * printstats_every_minutes) {
+ last_output = current_time;
+ lc_printstats ();
+ }
+}
+
+void
+lc_printstats_terminate (CCTK_ARGUMENTS);
+void
+lc_printstats_terminate (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+
+ lc_printstats ();
}
diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h
index 70d90ebb5..f405f6878 100644
--- a/Carpet/LoopControl/src/loopcontrol.h
+++ b/Carpet/LoopControl/src/loopcontrol.h
@@ -2,7 +2,7 @@
#define LC_LOOPCONTROL_H
/* This file uses the namespace LC_* for macros and lc_* for C
- identifiers. */
+ identifiers. */
#include <cctk.h>
@@ -64,11 +64,16 @@ typedef struct lc_stattime_t {
/* Data */
/* Statistics */
- double time_count; /* number of calls and threads */
- double time_setup_sum, time_setup_sum2; /* time spent setting up loops */
- double time_calc_sum, time_calc_sum2; /* time spent iterating */
+ /* number of calls and threads */
+ double time_count, time_count_init;
+ /* time spent setting up loops */
+ double time_setup_sum, time_setup_sum2;
+ /* time spent iterating */
+ double time_calc_sum, time_calc_sum2;
+ double time_calc_init; /* time for first calculation */
- double last_updated; /* wall time tag */
+ /* wall time tag */
+ double last_updated;
} lc_stattime_t;
@@ -103,9 +108,13 @@ typedef struct lc_statset_t {
lc_stattime_t * stattime_list;
/* Statistics */
- double time_count; /* number of calls and threads */
- double time_setup_sum, time_setup_sum2; /* time spent setting up loops */
- double time_calc_sum, time_calc_sum2; /* time spent iterating */
+ /* number of calls and threads */
+ double time_count, time_count_init;
+ /* time spent setting up loops */
+ double time_setup_sum, time_setup_sum2;
+ /* time spent iterating */
+ double time_calc_sum, time_calc_sum2;
+ double time_calc_init; /* time for first calculation */
} lc_statset_t;
@@ -347,7 +356,7 @@ lc_control_finish (lc_control_t * restrict lc);
} while (0)
/* Pre- and post loop statements are inserted around the innermost
- loop, which is executed serially. By default these are empty. */
+ loop, which is executed serially. By default these are empty. */
#define LC_PRELOOP_STATEMENTS
#define LC_POSTLOOP_STATEMENTS