aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Carpet/LoopControl/README4
-rw-r--r--Carpet/LoopControl/param.ccl16
-rw-r--r--Carpet/LoopControl/schedule.ccl10
-rw-r--r--Carpet/LoopControl/src/lc_auto.c4
-rw-r--r--Carpet/LoopControl/src/lc_siman.c4
-rw-r--r--Carpet/LoopControl/src/loopcontrol.c60
-rw-r--r--Carpet/LoopControl/src/loopcontrol_fortran.h2
7 files changed, 80 insertions, 20 deletions
diff --git a/Carpet/LoopControl/README b/Carpet/LoopControl/README
index 231398bb0..50e2eacb5 100644
--- a/Carpet/LoopControl/README
+++ b/Carpet/LoopControl/README
@@ -79,3 +79,7 @@ Combine several strategies:
- Note that a full sampling is too expensive.
- Use a hierarchical strategy for this?
- Use random sampling for this?
+
+5. Offer the parallelisation mechanism, or the tiling mechanism
+ separately, without actually looping over the array or over the
+ tiles.
diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl
index f49e73b37..5875c8dcc 100644
--- a/Carpet/LoopControl/param.ccl
+++ b/Carpet/LoopControl/param.ccl
@@ -2,6 +2,10 @@
# General options
+BOOLEAN printstats "Output timing statistics at shutdown" STEERABLE=recover
+{
+} "no"
+
BOOLEAN verbose "Verbosity" STEERABLE=always
{
} "no"
@@ -56,6 +60,14 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove
+# Use legacy configuration
+
+BOOLEAN legacy_init "Initialise with legacy configuration (usually slower)" STEERABLE=recover
+{
+} "no"
+
+
+
# Automatic: simple cycling
BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover
@@ -118,6 +130,10 @@ CCTK_REAL maximum_setup_overhead "Maximum allowable administrative overhead" STE
0.0:* :: ""
} 0.01
+BOOLEAN ignore_initial_overhead "Ignore the overhead from the initial setup" STEERABLE=recover
+{
+} "yes"
+
CCTK_REAL probability_small_jump "Probability for a small jump once a local minimum has been reached" STEERABLE=always
{
0.0:1.0 :: ""
diff --git a/Carpet/LoopControl/schedule.ccl b/Carpet/LoopControl/schedule.ccl
index 56204414d..fc14c7b0d 100644
--- a/Carpet/LoopControl/schedule.ccl
+++ b/Carpet/LoopControl/schedule.ccl
@@ -1,6 +1,8 @@
# Schedule definitions for thorn LoopControl
-SCHEDULE lc_printstats AT terminate
-{
- LANG: C
-} "Output loop control statistics"
+if (printstats) {
+ SCHEDULE lc_printstats AT terminate
+ {
+ LANG: C
+ } "Output loop control statistics"
+}
diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c
index 96c6b39d7..fcfecc47b 100644
--- a/Carpet/LoopControl/src/lc_auto.c
+++ b/Carpet/LoopControl/src/lc_auto.c
@@ -6,6 +6,10 @@
#include <cctk.h>
#include <cctk_Parameters.h>
+#ifdef HAVE_TGMATH_H
+# include <tgmath.h>
+#endif
+
#include "lc_siman.h"
#include "lc_auto.h"
diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c
index d34d3da0a..d9e11839e 100644
--- a/Carpet/LoopControl/src/lc_siman.c
+++ b/Carpet/LoopControl/src/lc_siman.c
@@ -30,6 +30,10 @@
#include <cctk.h>
+#ifdef HAVE_TGMATH_H
+# include <tgmath.h>
+#endif
+
#include "lc_siman.h"
static inline
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index 72fd3e40e..6f420e0c1 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -15,6 +15,10 @@
#include <cctk.h>
#include <cctk_Parameters.h>
+#ifdef HAVE_TGMATH_H
+# include <tgmath.h>
+#endif
+
#include "loopcontrol.h"
#include "lc_auto.h"
@@ -383,6 +387,7 @@ lc_statset_init (lc_statset_t * restrict const ls,
ls->topologies[n].nthreads[2]);
}
}
+ assert (ls->ntopologies > 0);
/*** Tilings ****************************************************************/
@@ -398,28 +403,24 @@ lc_statset_init (lc_statset_t * restrict const ls,
printf ("Dimension %d: %d points\n", d, ls->npoints[d]);
}
ls->tilings[d] = malloc (maxntilings * sizeof * ls->tilings[d]);
- int ntilings;
find_tiling_specifications
(ls->tilings[d], maxntilings, & ls->ntilings[d], ls->npoints[d]);
-#if 0
- ls->tilings[d] =
- realloc (ls->tilings[d], ls->ntilings[d] * sizeof * ls->tilings[d]);
-#endif
ls->topology_ntilings[d] =
malloc (ls->ntopologies * sizeof * ls->topology_ntilings[d]);
for (int n = 0; n < ls->ntopologies; ++n) {
int tiling;
- for (tiling = 0; tiling < ls->ntilings[d]; ++tiling) {
+ for (tiling = 1; tiling < ls->ntilings[d]; ++tiling) {
if (ls->tilings[d][tiling].npoints * ls->topologies[n].nthreads[d] >
ls->npoints[d])
{
break;
}
}
- ls->topology_ntilings[d][n] = tiling;
+ /* Always allow at least one tiling */
+ ls->topology_ntilings[d][n] = tiling == 0 ? 1 : tiling;
}
if (debug) {
- printf (" Found %d possible tilings\n", ntilings);
+ printf (" Found %d possible tilings\n", ls->ntilings[d]);
printf (" ");
for (int n = 0; n < ls->ntilings[d]; ++n) {
printf (" %d", ls->tilings[d][n].npoints);
@@ -605,8 +606,22 @@ lc_control_init (lc_control_t * restrict const lc,
} else {
/* Split in the k direction */
- state.topology = ls->ntopologies - 1;
+ for (state.topology = ls->ntopologies - 1;
+ state.topology >= 0;
+ -- state.topology)
+ {
+ int have_tilings = 1;
+ for (int d=0; d<3; ++d) {
+ have_tilings = have_tilings &&
+ ls->topology_ntilings[d][state.topology] > 0;
+ }
+ if (have_tilings) break;
+ }
+ if (state.topology < 0) {
+ assert (0);
+ CCTK_WARN (CCTK_WARN_ABORT, "grid too small");
+ }
}
/* Select tiling */
@@ -616,6 +631,7 @@ lc_control_init (lc_control_t * restrict const lc,
state.tiling[0] = -1;
} else {
/* as many points as possible */
+ assert (state.topology >= 0);
state.tiling[0] = ls->topology_ntilings[0][state.topology] - 1;
}
@@ -626,7 +642,12 @@ lc_control_init (lc_control_t * restrict const lc,
if (cycle_j_tilings) {
/* cycle through all tilings */
static int count = 0;
+ assert (state.topology >= 0);
state.tiling[1] = (count ++) % ls->topology_ntilings[1][state.topology];
+ } else if (legacy_init) {
+ /* as many points as possible */
+ assert (state.topology >= 0);
+ state.tiling[1] = ls->topology_ntilings[1][state.topology] - 1;
} else {
/* as few points as possible */
state.tiling[1] = 0;
@@ -638,6 +659,7 @@ lc_control_init (lc_control_t * restrict const lc,
state.tiling[2] = -1;
} else {
/* as many points as possible */
+ assert (state.topology >= 0);
state.tiling[2] = ls->topology_ntilings[2][state.topology] - 1;
}
@@ -752,6 +774,15 @@ lc_control_init (lc_control_t * restrict const lc,
void
lc_control_finish (lc_control_t * restrict const lc)
{
+ lc_stattime_t * restrict const lt = lc->stattime;
+ lc_statset_t * restrict const ls = lc->statset;
+
+ int ignore_iteration;
+ _Pragma ("omp single copyprivate (ignore_iteration)") {
+ DECLARE_CCTK_PARAMETERS;
+ ignore_iteration = ignore_initial_overhead && lt->time_count == 0.0;
+ }
+
/* Timer */
double const time_calc_end = omp_get_wtime();
double const time_calc_begin = lc->time_calc_begin;
@@ -759,15 +790,14 @@ lc_control_finish (lc_control_t * restrict const lc)
double const time_setup_end = time_calc_begin;
double const time_setup_begin = lc->time_setup_begin;
- double const time_setup_sum = time_setup_end - time_setup_begin;
+ double const time_setup_sum =
+ ignore_iteration ? 0.0 : time_setup_end - time_setup_begin;
double const time_setup_sum2 = pow (time_setup_sum, 2);
double const time_calc_sum = time_calc_end - time_calc_begin;
double const time_calc_sum2 = pow (time_calc_sum, 2);
/* Update statistics */
- lc_stattime_t * restrict const lt = lc->stattime;
- lc_statset_t * restrict const ls = lc->statset;
_Pragma ("omp critical") {
lt->time_count += 1.0;
@@ -831,7 +861,6 @@ void
lc_printstats (CCTK_ARGUMENTS)
{
DECLARE_CCTK_PARAMETERS;
- if (! verbose) return;
int nmaps = 0;
for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) {
@@ -868,10 +897,11 @@ lc_printstats (CCTK_ARGUMENTS)
}
++ ntimes;
}
+ double const avg_calc = sum_calc / sum_count;
printf (" total count: %g total setup: %g total calc: %g\n",
sum_count, sum_setup, sum_calc);
- printf (" min calc: %g (#%d)\n",
- min_calc, imin_calc);
+ printf (" avg calc: %g min calc: %g (#%d)\n",
+ avg_calc, min_calc, imin_calc);
++ nsets;
}
++ nmaps;
diff --git a/Carpet/LoopControl/src/loopcontrol_fortran.h b/Carpet/LoopControl/src/loopcontrol_fortran.h
index e80bf358f..7edf22d84 100644
--- a/Carpet/LoopControl/src/loopcontrol_fortran.h
+++ b/Carpet/LoopControl/src/loopcontrol_fortran.h
@@ -1,4 +1,4 @@
-/* -*-f90-mode-*- */
+/* -*-f90-*- */
#ifndef LOOPCONTROL_FORTRAN_H
#define LOOPCONTROL_FORTRAN_H