aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2007-10-10 13:39:00 +0000
committerErik Schnetter <schnetter@cct.lsu.edu>2007-10-10 13:39:00 +0000
commit3215a191496ef0f9807d3b574151e91ebc27ca8a (patch)
tree8536bd7c3c3f0ff0a44d2a7b030b5881b0179b8e /Carpet
parente15f981b761acb114010667880ef59fd508ee41a (diff)
LoopControl: Big update, introducing a hill climbing algorithm
darcs-hash:20071010133923-dae7b-a3406485d61ab795191655c89a16e2ae2c487978.gz
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/LoopControl/README72
-rw-r--r--Carpet/LoopControl/param.ccl68
-rw-r--r--Carpet/LoopControl/src/lc_auto.c35
-rw-r--r--Carpet/LoopControl/src/lc_auto.h10
-rw-r--r--Carpet/LoopControl/src/lc_hill.c324
-rw-r--r--Carpet/LoopControl/src/lc_hill.h38
-rw-r--r--Carpet/LoopControl/src/lc_siman.c6
-rw-r--r--Carpet/LoopControl/src/loopcontrol.c272
-rw-r--r--Carpet/LoopControl/src/loopcontrol.h94
-rw-r--r--Carpet/LoopControl/src/make.code.defn2
10 files changed, 805 insertions, 116 deletions
diff --git a/Carpet/LoopControl/README b/Carpet/LoopControl/README
index 00a9f04d1..231398bb0 100644
--- a/Carpet/LoopControl/README
+++ b/Carpet/LoopControl/README
@@ -7,3 +7,75 @@ Purpose of the thorn:
Iterate over multi-dimensional arrays in an efficient manner, using
OpenMP (if available) and cach-aware loop tiling.
+
+
+
+Comments and Ideas:
+
+Simulated annealing: Does not seem to work reliably. Needs many
+iterations, gets stuck in local minima. Does not handle topology
+changes well. Doesn't take past performance into account.
+
+
+
+Rewrite in C++? This would simplify the data types.
+
+
+
+Try loop skewing (see John Shalf's paper).
+
+
+
+Assumption: Each iteration is cheap, so that gradual tuning works.
+
+Design choice: No explicit tuning, works as black box.
+
+Assumpton: There are so many configurations (which change at run time)
+and so many parameter choices (which would need to be tested for each
+configuration) that it is not feasible to test all possibilities.
+
+
+
+
+Idea for a better algorithm:
+
+Combine several strategies:
+
+1.a. Robust default setting:
+ - Topology: split in z direction
+ - Tiling: [N,1,] (test this more)
+
+2.a. Local 1D tiling optimisation (based on line searches):
+ - Globally define some maximum step size
+ - Choose a direction (i, j, or k)
+ - Determine a search interval in that direction
+ - Use exponential search to find interval boundaries
+ - Use arithmetic search to find minimum
+
+2.b. Local full tiling optimisation:
+ - Perform a series of 1D optimisations, until a local minimum in
+ for all directions has been found
+
+2.c. Change topology:
+ - Choose a new, "nearby" tiling
+ - Start with a good tiling from the previous topology
+ - Then perform a full tiling optimisation
+
+2.d. Choose a random new tiling
+ - Then perform a full tiling optimisation
+
+2.e. Choose a random new topology
+ - Then choose a random new tiling
+
+3.a. Whenever a test is done, repeat it a few times to avoid random
+ events
+ - Maybe don't repeat it right away, but wait some time
+
+3.b. If a value is old, forget it with a certain probability
+ - Need to remember a "last updated" field with statistics
+
+4.a. Offer an option to sample a wide range of configurations to give
+ the user an overview
+ - Note that a full sampling is too expensive.
+ - Use a hierarchical strategy for this?
+ - Use random sampling for this?
diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl
index 009ee76e5..f49e73b37 100644
--- a/Carpet/LoopControl/param.ccl
+++ b/Carpet/LoopControl/param.ccl
@@ -56,7 +56,7 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove
-# Automatic
+# Automatic: simple cycling
BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover
{
@@ -64,41 +64,91 @@ BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction"
-BOOLEAN use_simulated_annealing "Find a good loop configuration through simulated annealing"
+# Automatic: simulated annealing
+
+BOOLEAN use_simulated_annealing "Find a good loop configuration through simulated annealing" STEERABLE=recover
{
} "no"
-CCTK_INT siman_iters_fixed_T ""
+CCTK_INT siman_iters_fixed_T "" STEERABLE=recover
{
1:* :: ""
} 1
-CCTK_REAL siman_probability_change_topology ""
+CCTK_REAL siman_probability_change_topology "" STEERABLE=recover
{
0:1 :: ""
} 0.1
-CCTK_REAL siman_step_size ""
+CCTK_REAL siman_step_size "" STEERABLE=recover
{
(1.0:* :: ""
} 3.0
-CCTK_REAL siman_k "energy scale"
+CCTK_REAL siman_k "energy scale" STEERABLE=recover
{
(0:* :: ""
} 1.0e-9
-CCTK_REAL siman_T_initial "initial variability"
+CCTK_REAL siman_T_initial "initial variability" STEERABLE=recover
{
(0:* :: ""
} 1.0
-CCTK_REAL siman_mu_T "speed"
+CCTK_REAL siman_mu_T "speed" STEERABLE=recover
{
(0:* :: ""
} 1.005
-CCTK_REAL siman_T_min "stopping criterion"
+CCTK_REAL siman_T_min "stopping criterion" STEERABLE=recover
{
(0:* :: ""
} 0.01
+
+
+
+# Automatic: random restart hill climbing
+
+BOOLEAN use_random_restart_hill_climbing "http://en.wikipedia.org/wiki/Hill_climbing http://en.wikipedia.org/wiki/Tabu_search" STEERABLE=always
+{
+} "no"
+
+CCTK_REAL maximum_setup_overhead "Maximum allowable administrative overhead" STEERABLE=always
+{
+ 0.0:* :: ""
+} 0.01
+
+CCTK_REAL probability_small_jump "Probability for a small jump once a local minimum has been reached" STEERABLE=always
+{
+ 0.0:1.0 :: ""
+} 0.1
+
+CCTK_INT small_jump_distance "Maximum distance for a small jump" STEERABLE=always
+{
+ 0:* :: ""
+} 3
+
+CCTK_REAL probability_random_jump "Probability for a random jump once a local minimum has been reached" STEERABLE=always
+{
+ 0.0:1.0 :: ""
+} 0.01
+
+CCTK_INT max_jump_attempts "Maximum number of attempts to find a random unknown location" STEERABLE=always
+{
+ 0:* :: ""
+} 10
+
+CCTK_REAL immediate_overhead_threshold "The maximum overhead (ratio of current to best known time) allowed during an excursion" STEERABLE=always
+{
+ 0.0:* :: ""
+} 1.0
+
+CCTK_REAL delayed_overhead_threshold "The maximum overhead (ratio of current to best known time) allowed during an excursion" STEERABLE=always
+{
+ 0.0:* :: ""
+} 0.1
+
+CCTK_INT overhead_threshold_delay "Number of steps in an excursion before the delayed overhead criterion is applied" STEERABLE=always
+{
+ 0:* :: ""
+} 20
diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c
index 73d9a43fc..96c6b39d7 100644
--- a/Carpet/LoopControl/src/lc_auto.c
+++ b/Carpet/LoopControl/src/lc_auto.c
@@ -32,7 +32,7 @@ void
take_step (const gsl_rng *rng, void *xp_, double step_size)
{
DECLARE_CCTK_PARAMETERS;
- lc_auto_position_t * restrict const xp = xp_;
+ lc_state_t * restrict const xp = xp_;
if (gsl_rng_uniform (rng) < siman_probability_change_topology) {
xp->topology = gsl_rng_uniform_int (rng, statset->ntopologies);
}
@@ -45,8 +45,8 @@ static
double
distance (void *xp_, void *yp_)
{
- lc_auto_position_t const * restrict const xp = xp_;
- lc_auto_position_t const * restrict const yp = yp_;
+ lc_state_t const * restrict const xp = xp_;
+ lc_state_t const * restrict const yp = yp_;
double dist = 10 * (xp->topology != yp->topology);
for (int d=0; d<3; ++d) {
dist += fabs (xp->tiling[d] - yp->tiling[d]);
@@ -58,8 +58,8 @@ static
void
print_position (void *xp_)
{
- lc_auto_position_t const * restrict const xp = xp_;
- printf (" #%2d/[%3d,%3d,%3d]",
+ lc_state_t const * restrict const xp = xp_;
+ printf (" %2d/{%3d,%3d,%3d}",
xp->topology,
xp->tiling[0], xp->tiling[1], xp->tiling[2]);
}
@@ -68,8 +68,7 @@ print_position (void *xp_)
void
lc_auto_init (lc_statset_t * restrict const ls,
- int * restrict const topology,
- int tiling[3])
+ lc_state_t * restrict const state)
{
DECLARE_CCTK_PARAMETERS;
assert (! cycle_j_tilings); /* don't mix strategies */
@@ -91,11 +90,8 @@ lc_auto_init (lc_statset_t * restrict const ls,
gsl_rng_env_setup();
ls->auto_state->rng = gsl_rng_alloc (gsl_rng_default);
- /* Set initial position */
- ls->auto_state->position.topology = * topology;
- for (int d=0; d<3; ++d) {
- ls->auto_state->position.tiling[d] = tiling[d];
- }
+ /* Set initial state */
+ ls->auto_state->state = * state;
/* Initialise simulated annealing state */
statset = ls;
@@ -103,8 +99,8 @@ lc_auto_init (lc_statset_t * restrict const ls,
lc_siman_solve (NULL,
ls->auto_state->rng,
NULL, 0.0,
- take_step, distance, print_position,
- sizeof (lc_auto_position_t),
+ take_step, distance, verbose ? print_position : NULL,
+ sizeof (lc_state_t),
ls->auto_state->siman_params);
} else {
@@ -116,17 +112,14 @@ lc_auto_init (lc_statset_t * restrict const ls,
ls->auto_state->siman_state =
lc_siman_solve (ls->auto_state->siman_state,
ls->auto_state->rng,
- & ls->auto_state->position, ls->auto_state->time,
- take_step, distance, print_position,
- sizeof (lc_auto_position_t),
+ & ls->auto_state->state, ls->auto_state->time,
+ take_step, distance, verbose ? print_position : NULL,
+ sizeof (lc_state_t),
ls->auto_state->siman_params);
}
/* Set thread topology and tiling specification */
- * topology = ls->auto_state->position.topology;
- for (int d=0; d<3; ++d) {
- tiling[d] = ls->auto_state->position.tiling[d];
- }
+ * state = ls->auto_state->state;
} /* if not the first call */
}
diff --git a/Carpet/LoopControl/src/lc_auto.h b/Carpet/LoopControl/src/lc_auto.h
index a0c7b1798..a89db6aa8 100644
--- a/Carpet/LoopControl/src/lc_auto.h
+++ b/Carpet/LoopControl/src/lc_auto.h
@@ -8,18 +8,13 @@
-typedef struct lc_auto_position_t {
- int topology;
- int tiling[3];
-} lc_auto_position_t;
-
/* no typedef here; forward declcared in loopcontrol.h */
struct lc_auto_state_t {
lc_siman_params_t siman_params;
gsl_rng * rng;
lc_siman_state_t * siman_state;
- lc_auto_position_t position;
+ lc_state_t state;
double time;
};
@@ -27,8 +22,7 @@ struct lc_auto_state_t {
void
lc_auto_init (lc_statset_t * restrict const ls,
- int * restrict const topology,
- int tiling[3]);
+ lc_state_t * restrict const state);
void
lc_auto_finish (lc_statset_t * restrict const ls,
diff --git a/Carpet/LoopControl/src/lc_hill.c b/Carpet/LoopControl/src/lc_hill.c
new file mode 100644
index 000000000..f24b0efae
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_hill.c
@@ -0,0 +1,324 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "loopcontrol.h"
+
+#include "lc_hill.h"
+
+
+
+static inline
+int
+imin (int const a, int const b)
+{
+ return a < b ? a : b;
+}
+
+static inline
+int
+imax (int const a, int const b)
+{
+ return a > b ? a : b;
+}
+
+static
+double
+drand (void)
+{
+ return rand() / (RAND_MAX + 1.0);
+}
+
+static
+int
+irand (int const imax)
+{
+ return rand() / (RAND_MAX + 1.0) * imax;
+}
+
+
+
+static
+double
+time_for_stattime (lc_stattime_t const * restrict const lt)
+{
+ assert (lt);
+ return lt->time_calc_sum / lt->time_count;
+}
+
+static
+double
+time_for_state (lc_statset_t const * restrict const ls,
+ lc_state_t const * restrict const state)
+{
+ lc_stattime_t const * restrict const lt = lc_stattime_find (ls, state);
+ assert (lt);
+ return time_for_stattime (lt);
+}
+
+
+
+void
+lc_hill_init (lc_statset_t * restrict const ls,
+ lc_state_t * const new_state)
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ lc_hill_state_t * restrict lh = ls->hill_state;
+
+ /* Initialise state */
+ if (! lh) {
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Initialising");
+ }
+ ls->hill_state = malloc (sizeof * ls->hill_state);
+ lh = ls->hill_state;
+ lh->iteration = 0;
+ lh->have_best = 0;
+ lh->excursion_start = 0;
+ lh->have_previous = 0;
+ lh->state = * new_state;
+ return;
+ }
+
+ /* If the overhead has become too large, do nothing. */
+ if (ls->time_setup_sum > maximum_setup_overhead * ls->time_calc_sum) {
+ /* Stay at the old state. */
+ * new_state = lh->state;
+ return;
+ }
+
+ ++ lh->iteration;
+
+ if (verbose) {
+ CCTK_VInfo (CCTK_THORNSTRING,
+ "Hill climbing: iter %d, state %2d/{%2d,%2d,%2d}, time %g",
+ lh->iteration,
+ lh->state.topology,
+ lh->state.tiling[0],
+ lh->state.tiling[1],
+ lh->state.tiling[2],
+ lh->time);
+ }
+
+ /* Test whether we have a new best time */
+ if (! lh->have_best || lh->time < lh->best_time) {
+ /* Remember this state */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: This is a new best time");
+ }
+ lh->best = lh->state;
+ lh->best_time = lh->time;
+ lh->have_best = 1;
+ lh->excursion_start = lh->iteration;
+ } else if (lh->have_best && lc_state_equal (& lh->state, & lh->best)) {
+ /* Update time for best state */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Updating best time");
+ }
+ lh->best_time = lh->time;
+ }
+
+ /* Compare the time for the current state with the time for the
+ previous state. If the previous state was better, backtrack. */
+ if (lh->have_previous && lh->previous_time < lh->time) {
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Backtracking");
+ }
+ lh->state = lh->previous;
+ lh->time = lh->previous_time;
+ lh->have_previous = 0;
+ }
+
+ /* Give up if the current time is too bad */
+ if (lh->have_best) {
+ int const immediate_overhead =
+ lh->time > lh->best_time * (1.0 + immediate_overhead_threshold);
+ int const delayed_overhead =
+ lh->iteration > lh->excursion_start + overhead_threshold_delay &&
+ lh->time > lh->best_time * (1.0 + delayed_overhead_threshold);
+ if (immediate_overhead || delayed_overhead) {
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Reverting to best known state");
+ }
+ lh->excursion_start = lh->iteration;
+ lh->state = lh->best;
+ lh->time = lh->best_time;
+ }
+ }
+
+ /* Age the state */
+ lh->previous = lh->state;
+ lh->previous_time = lh->time;
+ lh->have_previous = 1;
+
+ search:;
+
+ /* Look which neighbours exist. */
+ typedef enum { nb_boundary, nb_missing, nb_exists } neighbour_t;
+ neighbour_t neighbours[3][2];
+ lc_state_t nb_state[3][2];
+ double nb_time[3][2];
+ lc_state_t * nb_nonexist_state[6];
+ int num_nonexist_states = 0;
+ lc_state_t * nb_minimum_time = NULL;
+ double minimum_time;
+ for (int d=0; d<3; ++d) {
+ for (int f=0; f<2; ++f) {
+ nb_state[d][f] = lh->state;
+ nb_state[d][f].tiling[d] += f ? + 1: -1;
+ int const ntilings = ls->topology_ntilings[d][nb_state[d][f].topology];
+ if (nb_state[d][f].tiling[d] < 0 ||
+ nb_state[d][f].tiling[d] >= ntilings)
+ {
+ neighbours[d][f] = nb_boundary;
+ } else {
+ lc_stattime_t const * restrict const nb_lt =
+ lc_stattime_find (ls, & nb_state[d][f]);
+ if (! nb_lt) {
+ neighbours[d][f] = nb_missing;
+ nb_nonexist_state[num_nonexist_states++] = & nb_state[d][f];
+ } else {
+ neighbours[d][f] = nb_exists;
+ nb_time[d][f] = time_for_stattime (nb_lt);
+ if (! nb_minimum_time || nb_time[d][f] < minimum_time) {
+ nb_minimum_time = & nb_state[d][f];
+ minimum_time = nb_time[d][f];
+ }
+ }
+ }
+ }
+ }
+
+ /* If not all neighbours exist, then choose a random neighbour and
+ move there. */
+ if (num_nonexist_states > 0) {
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Examining a new state");
+ }
+ int const choice = irand (num_nonexist_states);
+ lh->state = * nb_nonexist_state[choice];
+ * new_state = lh->state;
+ return;
+ }
+
+ /* All neighbours exist. Look whether we are in a local
+ minimum. */
+ assert (nb_minimum_time);
+ if (minimum_time >= lh->time) {
+ /* We are in a local minimum. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Local minimum reached");
+ }
+
+ /* Every so often take a small jump. */
+ if (drand() < probability_small_jump) {
+ /* Be curious, go somewhere nearby. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Making a small jump");
+ }
+ for (int ntries = 0; ntries < max_jump_attempts; ++ ntries) {
+ lc_state_t try_state = lh->state;
+ if (drand() < 0.25) {
+ /* Change the topology, but not the tiling. */
+ try_state.topology = irand (ls->ntopologies);
+ for (int d=0; d<3; ++d) {
+ if (try_state.tiling[d] >=
+ ls->topology_ntilings[d][try_state.topology])
+ {
+ /* The tiling doesn't fit for this new topology; don't
+ choose this topology. */
+ goto next_try;
+ }
+ }
+ } else {
+ /* Change the tiling a bit, but keep the topology */
+ for (int d=0; d<3; ++d) {
+ int const i0 =
+ imax (try_state.tiling[d] - small_jump_distance, 0);
+ int const i1 =
+ imin (try_state.tiling[d] + small_jump_distance + 1,
+ ls->topology_ntilings[d][try_state.topology]);
+ try_state.tiling[d] = i0 + irand (i1 - i0);
+ }
+ }
+ if (! lc_stattime_find (ls, & try_state)) {
+ lh->state = try_state;
+ * new_state = lh->state;
+ return;
+ }
+ next_try:;
+ }
+ /* Don't jump after all. */
+ }
+
+ /* Every so often take a random jump. */
+ if (drand() < probability_random_jump) {
+ /* Be adventurous, go somewhere unknown. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Jumping randomly");
+ }
+ for (int ntries = 0; ntries < max_jump_attempts; ++ ntries) {
+ lc_state_t try_state;
+ try_state.topology = irand (ls->ntopologies);
+ for (int d=0; d<3; ++d) {
+ try_state.tiling[d] =
+ irand (ls->topology_ntilings[d][try_state.topology]);
+ }
+ if (! lc_stattime_find (ls, & try_state)) {
+ /* The new state is hitherto unknown, use it. */
+ lh->state = try_state;
+ lh->excursion_start = lh->iteration;
+ lh->have_previous = 0; /* disable backtracking */
+ * new_state = lh->state;
+ return;
+ }
+ }
+ /* Don't jump after all. */
+ }
+
+ /* If the current state is not the best state, give up and go
+ back. */
+ if (! lc_state_equal (& lh->state, & lh->best)) {
+ /* Revert to the best known state. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Reverting to best known state");
+ }
+ lh->state = lh->best;
+ lh->excursion_start = lh->iteration;
+ lh->have_previous = 0;
+ * new_state = lh->best;
+ return;
+ }
+
+ /* Be content, do nothing. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Resting");
+ }
+ * new_state = lh->state;
+ return;
+ }
+
+ /* One of the neighbours is better. Move to this neighbour, and
+ continue the search there. */
+ if (verbose) {
+ CCTK_INFO ("Hill climbing: Found a better neighbour, going there");
+ }
+ lh->state = * nb_minimum_time;
+ lh->time = minimum_time;
+ goto search;
+}
+
+
+
+void
+lc_hill_finish (lc_statset_t * restrict const ls,
+ lc_stattime_t const * restrict const lt)
+{
+ lc_hill_state_t * restrict const lh = ls->hill_state;
+
+ lh->time = time_for_stattime (lt);
+}
diff --git a/Carpet/LoopControl/src/lc_hill.h b/Carpet/LoopControl/src/lc_hill.h
new file mode 100644
index 000000000..7d418314b
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_hill.h
@@ -0,0 +1,38 @@
+#ifndef LC_HILL_H
+#define LC_HILL_H
+
+#include <cctk.h>
+
+#include "loopcontrol.h"
+
+
+
+/* no typedef here; forward declcared in loopcontrol.h */
+struct lc_hill_state_t {
+ int iteration;
+
+ lc_state_t best;
+ double best_time;
+ int have_best;
+
+ int excursion_start; /* -1 if not on an excursion */
+
+ lc_state_t previous;
+ double previous_time;
+ int have_previous;
+
+ lc_state_t state;
+ double time;
+};
+
+
+
+void
+lc_hill_init (lc_statset_t * restrict const ls,
+ lc_state_t * restrict const state);
+
+void
+lc_hill_finish (lc_statset_t * restrict const ls,
+ lc_stattime_t const * restrict const lt);
+
+#endif /* #ifndef LC_HILL_H */
diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c
index a6fa22bc7..d34d3da0a 100644
--- a/Carpet/LoopControl/src/lc_siman.c
+++ b/Carpet/LoopControl/src/lc_siman.c
@@ -28,6 +28,8 @@
#include <gsl/gsl_machine.h>
#include <gsl/gsl_rng.h>
+#include <cctk.h>
+
#include "lc_siman.h"
static inline
@@ -39,11 +41,11 @@ double safe_exp (double x) /* avoid underflow errors for large uphill steps */
/* this structure contains internal state information for
lc_siman_solve */
-typedef enum { state_initial, state_first, state_looping } lc_state_t;
+typedef enum { state_initial, state_first, state_looping } lc_siman_location_t;
/* no typedef here; forward declcared in lc_siman.h */
struct lc_siman_state_t {
- lc_state_t state;
+ lc_siman_location_t state;
const gsl_rng *r;
lc_siman_step_t take_step;
lc_siman_metric_t distance;
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index 32661064b..72fd3e40e 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -12,10 +12,13 @@
# include <omp.h>
#endif
+#include <cctk.h>
#include <cctk_Parameters.h>
#include "loopcontrol.h"
+
#include "lc_auto.h"
+#include "lc_hill.h"
@@ -77,9 +80,9 @@ find_thread_topologies (lc_topology_t * restrict const topologies,
int const ni = nthreads/(nj*nk);
if (nthreads == ni*nj*nk) {
assert (* ntopologies < maxntopologies);
- topologies[* ntopologies].ni = ni;
- topologies[* ntopologies].nj = nj;
- topologies[* ntopologies].nk = nk;
+ topologies[* ntopologies].nthreads[0] = ni;
+ topologies[* ntopologies].nthreads[1] = nj;
+ topologies[* ntopologies].nthreads[2] = nk;
++ * ntopologies;
}
}
@@ -101,6 +104,7 @@ find_thread_topologies (lc_topology_t * restrict const topologies,
into two subdomains with [100+N, 100-N] points. This is avoided by
covering all possible tiling specifications in exponentially
growing step sizes. */
+#if 0
static
int tiling_compare (const void * const a, const void * const b)
{
@@ -154,28 +158,60 @@ find_tiling_specifications (lc_tiling_t * restrict const tilings,
/* Sort */
qsort (tilings, * ntilings, sizeof * tilings, tiling_compare);
}
+#endif
+
+static
+void
+find_tiling_specifications (lc_tiling_t * restrict const tilings,
+ const int maxntilings,
+ int * restrict const ntilings,
+ int const npoints)
+{
+ /* In order to reduce the number of possible tilings, require that
+ the step sizes differ by more than 10% */
+ double const distance_factor = 1.1;
+ /* For N grid points and a minimum spacing factor F, there are at
+ most log(N) / log(F) possible tilings. There will be fewer,
+ since the actual spacings will be rounded up to integers. */
+
+ * ntilings = 0;
+
+ int minnpoints = 0;
+ for (int n=1; n<npoints; ++n) {
+ if ((double) n > minnpoints * distance_factor) {
+ assert (* ntilings < maxntilings);
+ tilings[* ntilings].npoints = n;
+ minnpoints = n;
+ ++ * ntilings;
+ }
+ }
+
+ assert (* ntilings < maxntilings);
+ tilings[* ntilings].npoints = npoints;
+ minnpoints = npoints;
+ ++ * ntilings;
+}
/* Initialise control parameter set statistics */
-static
void
lc_stattime_init (lc_stattime_t * restrict const lt,
lc_statset_t * restrict const ls,
- int const topology,
- int const tiling[3])
+ lc_state_t const * restrict const state)
{
DECLARE_CCTK_PARAMETERS;
/* Check arguments */
assert (lt);
assert (ls);
+ assert (state);
/*** Topology ***************************************************************/
- lt->topology = topology;
+ lt->state.topology = state->topology;
- if (topology == -1) {
+ if (state->topology == -1) {
/* User-specified topology */
lt->inthreads = -1;
@@ -184,21 +220,21 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
} else {
- assert (topology >= 0 && topology < ls->ntopologies);
+ assert (state->topology >= 0 && state->topology < ls->ntopologies);
- lt->inthreads = ls->topologies[lt->topology].ni;
- lt->jnthreads = ls->topologies[lt->topology].nj;
- lt->knthreads = ls->topologies[lt->topology].nk;
+ lt->inthreads = ls->topologies[lt->state.topology].nthreads[0];
+ lt->jnthreads = ls->topologies[lt->state.topology].nthreads[1];
+ lt->knthreads = ls->topologies[lt->state.topology].nthreads[2];
}
if (debug) {
printf ("Thread topology #%d [%d,%d,%d]\n",
- lt->topology, lt->inthreads, lt->jnthreads, lt->knthreads);
+ lt->state.topology, lt->inthreads, lt->jnthreads, lt->knthreads);
}
/* Assert thread topology consistency */
- if (lt->topology != -1) {
+ if (lt->state.topology != -1) {
assert (lt->inthreads >= 1);
assert (lt->jnthreads >= 1);
assert (lt->knthreads >= 1);
@@ -208,20 +244,21 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
/*** Tilings ****************************************************************/
for (int d=0; d<3; ++d) {
- lt->tiling[d] = tiling[d];
- if (tiling[d] != -1) {
- assert (tiling[d] >= 0 && tiling[d] < ls->ntilings[d]);
+ lt->state.tiling[d] = state->tiling[d];
+ if (state->tiling[d] != -1) {
+ assert (state->tiling[d] >= 0 &&
+ state->tiling[d] < ls->topology_ntilings[d][lt->state.topology]);
}
}
- if (tiling[0] != -1) {
- lt->inpoints = ls->tilings[0][lt->tiling[0]].npoints;
+ if (state->tiling[0] != -1) {
+ lt->inpoints = ls->tilings[0][lt->state.tiling[0]].npoints;
}
- if (tiling[1] != -1) {
- lt->jnpoints = ls->tilings[1][lt->tiling[1]].npoints;
+ if (state->tiling[1] != -1) {
+ lt->jnpoints = ls->tilings[1][lt->state.tiling[1]].npoints;
}
- if (tiling[2] != -1) {
- lt->knpoints = ls->tilings[2][lt->tiling[2]].npoints;
+ if (state->tiling[2] != -1) {
+ lt->knpoints = ls->tilings[2][lt->state.tiling[2]].npoints;
}
if (debug) {
@@ -230,13 +267,13 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
}
/* Assert tiling specification consistency */
- if (tiling[0] != -1) {
+ if (state->tiling[0] != -1) {
assert (lt->inpoints > 0);
}
- if (tiling[1] != -1) {
+ if (state->tiling[1] != -1) {
assert (lt->jnpoints > 0);
}
- if (tiling[2] != -1) {
+ if (state->tiling[2] != -1) {
assert (lt->knpoints > 0);
}
@@ -249,6 +286,10 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
lt->time_calc_sum = 0.0;
lt->time_calc_sum2 = 0.0;
+ lt->last_updated = 0.0; /* never updated */
+
+
+
/* Append to loop statistics list */
/* _Pragma ("omp critical") { */
lt->next = ls->stattime_list;
@@ -256,38 +297,49 @@ lc_stattime_init (lc_stattime_t * restrict const lt,
/* } */
}
-static
lc_stattime_t *
-lc_stattime_find (lc_statset_t * restrict const ls,
- int const topology,
- int const tiling[3])
+lc_stattime_find (lc_statset_t const * restrict const ls,
+ lc_state_t const * restrict const state)
{
assert (ls);
lc_stattime_t * lt;
for (lt = ls->stattime_list; lt; lt = lt->next) {
- if (lt->topology == topology &&
- lt->tiling[0] == tiling[0] &&
- lt->tiling[1] == tiling[1] &&
- lt->tiling[2] == tiling[2])
- {
+ if (lc_state_equal (& lt->state, state)) {
+ break;
+ }
+ }
+
+ return lt;
+}
+
+lc_stattime_t *
+lc_stattime_find_create (lc_statset_t * restrict const ls,
+ lc_state_t const * restrict const state)
+{
+ assert (ls);
+
+ lc_stattime_t * lt;
+
+ for (lt = ls->stattime_list; lt; lt = lt->next) {
+ if (lc_state_equal (& lt->state, state)) {
break;
}
}
if (! lt) {
lt = malloc (sizeof * lt);
- lc_stattime_init (lt, ls, topology, tiling);
+ lc_stattime_init (lt, ls, state);
}
+ assert (lt);
return lt;
}
/* Initialise user parameter set statistics */
-static
void
lc_statset_init (lc_statset_t * restrict const ls,
lc_statmap_t * restrict const lm,
@@ -326,7 +378,9 @@ lc_statset_init (lc_statset_t * restrict const ls,
for (int n = 0; n < ls->ntopologies; ++n) {
printf (" %2d: %2d %2d %2d\n",
n,
- ls->topologies[n].ni, ls->topologies[n].nj, ls->topologies[n].nk);
+ ls->topologies[n].nthreads[0],
+ ls->topologies[n].nthreads[1],
+ ls->topologies[n].nthreads[2]);
}
}
@@ -344,14 +398,28 @@ lc_statset_init (lc_statset_t * restrict const ls,
printf ("Dimension %d: %d points\n", d, ls->npoints[d]);
}
ls->tilings[d] = malloc (maxntilings * sizeof * ls->tilings[d]);
+ int ntilings;
find_tiling_specifications
(ls->tilings[d], maxntilings, & ls->ntilings[d], ls->npoints[d]);
#if 0
ls->tilings[d] =
realloc (ls->tilings[d], ls->ntilings[d] * sizeof * ls->tilings[d]);
#endif
+ ls->topology_ntilings[d] =
+ malloc (ls->ntopologies * sizeof * ls->topology_ntilings[d]);
+ for (int n = 0; n < ls->ntopologies; ++n) {
+ int tiling;
+ for (tiling = 0; tiling < ls->ntilings[d]; ++tiling) {
+ if (ls->tilings[d][tiling].npoints * ls->topologies[n].nthreads[d] >
+ ls->npoints[d])
+ {
+ break;
+ }
+ }
+ ls->topology_ntilings[d][n] = tiling;
+ }
if (debug) {
- printf (" Found %d possible tilings\n", ls->ntilings[d]);
+ printf (" Found %d possible tilings\n", ntilings);
printf (" ");
for (int n = 0; n < ls->ntilings[d]; ++n) {
printf (" %d", ls->tilings[d][n].npoints);
@@ -365,11 +433,21 @@ lc_statset_init (lc_statset_t * restrict const ls,
/* Simulated annealing state */
ls->auto_state = NULL;
+ /* Hill climbing state */
+ ls->hill_state = NULL;
+
/* Initialise list */
ls->stattime_list = NULL;
+ /* Initialise statistics */
+ ls->time_count = 0.0;
+ ls->time_setup_sum = 0.0;
+ ls->time_setup_sum2 = 0.0;
+ ls->time_calc_sum = 0.0;
+ ls->time_calc_sum2 = 0.0;
+
/* Append to loop statistics list */
/* _Pragma ("omp critical") { */
ls->next = lm->statset_list;
@@ -377,9 +455,8 @@ lc_statset_init (lc_statset_t * restrict const ls,
/* } */
}
-static
lc_statset_t *
-lc_statset_find (lc_statmap_t * restrict const lm,
+lc_statset_find (lc_statmap_t const * restrict const lm,
int const num_threads,
int const npoints[3])
{
@@ -397,11 +474,34 @@ lc_statset_find (lc_statmap_t * restrict const lm,
}
}
+ return ls;
+}
+
+lc_statset_t *
+lc_statset_find_create (lc_statmap_t * restrict const lm,
+ int const num_threads,
+ int const npoints[3])
+{
+ assert (lm);
+
+ lc_statset_t * ls;
+
+ for (ls = lm->statset_list; ls; ls = ls->next) {
+ if (ls->num_threads == num_threads &&
+ ls->npoints[0] == npoints[0] &&
+ ls->npoints[1] == npoints[1] &&
+ ls->npoints[2] == npoints[2])
+ {
+ break;
+ }
+ }
+
if (! ls) {
ls = malloc (sizeof * ls);
lc_statset_init (ls, lm, num_threads, npoints);
}
+ assert (ls);
return ls;
}
@@ -472,7 +572,7 @@ lc_control_init (lc_control_t * restrict const lc,
npoints[1] = lc_max (jmax - jmin, 0);
npoints[2] = lc_max (kmax - kmin, 0);
- ls = lc_statset_find (lm, num_threads, npoints);
+ ls = lc_statset_find_create (lm, num_threads, npoints);
}
@@ -480,9 +580,9 @@ lc_control_init (lc_control_t * restrict const lc,
lc_stattime_t * restrict lt;
_Pragma ("omp single copyprivate (lt)") {
- /* Select topology */
+ lc_state_t state;
- int topology;
+ /* Select topology */
if (lc_inthreads != -1 || lc_jnthreads != -1 || lc_knthreads != -1)
{
@@ -490,69 +590,73 @@ lc_control_init (lc_control_t * restrict const lc,
if (lc_inthreads == -1 || lc_jnthreads == -1 || lc_knthreads == -1) {
CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Illegal thread topology [%d,%d,%d] specified\n",
+ "Illegal thread topology [%d,%d,%d] specified",
(int)lc_inthreads, (int)lc_jnthreads, (int)lc_knthreads);
}
if (lc_inthreads * lc_jnthreads * lc_knthreads != ls->num_threads) {
CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Specified thread topology [%d,%d,%d] is not compatible with the number of threads %d\n",
+ "Specified thread topology [%d,%d,%d] is not compatible with the number of threads %d",
(int)lc_inthreads, (int)lc_jnthreads, (int)lc_knthreads,
ls->num_threads);
}
- topology = -1;
+ state.topology = -1;
} else {
/* Split in the k direction */
- topology = ls->ntopologies - 1;
+ state.topology = ls->ntopologies - 1;
}
/* Select tiling */
- int tiling[3];
-
if (lc_inpoints != -1) {
/* User-specified tiling */
- tiling[0] = -1;
+ state.tiling[0] = -1;
} else {
- tiling[0] = ls->ntilings[0] - 1; /* as many points as possible */
+ /* as many points as possible */
+ state.tiling[0] = ls->topology_ntilings[0][state.topology] - 1;
}
if (lc_jnpoints != -1) {
/* User-specified tiling */
- tiling[1] = -1;
+ state.tiling[1] = -1;
} else {
if (cycle_j_tilings) {
/* cycle through all tilings */
static int count = 0;
- tiling[1] = (count ++) % ls->ntilings[1];
+ state.tiling[1] = (count ++) % ls->topology_ntilings[1][state.topology];
} else {
/* as few points as possible */
- tiling[1] = 0;
+ state.tiling[1] = 0;
}
}
if (lc_knpoints != -1) {
/* User-specified tiling */
- tiling[2] = -1;
+ state.tiling[2] = -1;
} else {
- tiling[2] = ls->ntilings[2] - 1; /* as many points as possible */
+ /* as many points as possible */
+ state.tiling[2] = ls->topology_ntilings[2][state.topology] - 1;
}
/* Use simulated annealing to find the best loop configuration */
if (use_simulated_annealing) {
- lc_auto_init (ls, & topology, tiling);
+ lc_auto_init (ls, & state);
+ }
+ /* Use hill climbing to find the best loop configuration */
+ if (use_random_restart_hill_climbing) {
+ lc_hill_init (ls, & state);
}
/* Find or create database entry */
- lt = lc_stattime_find (ls, topology, tiling);
+ lt = lc_stattime_find_create (ls, & state);
/* Topology */
- if (topology == -1) {
+ if (state.topology == -1) {
/* User-specified topology */
lt->inthreads = lc_inthreads;
lt->jnthreads = lc_jnthreads;
@@ -567,15 +671,15 @@ lc_control_init (lc_control_t * restrict const lc,
/* Tilings */
- if (tiling[0] == -1) {
+ if (state.tiling[0] == -1) {
/* User-specified tiling */
lt->inpoints = lc_inpoints;
}
- if (tiling[1] == -1) {
+ if (state.tiling[1] == -1) {
/* User-specified tiling */
lt->jnpoints = lc_jnpoints;
}
- if (tiling[2] == -1) {
+ if (state.tiling[2] == -1) {
/* User-specified tiling */
lt->knpoints = lc_knpoints;
}
@@ -663,6 +767,7 @@ lc_control_finish (lc_control_t * restrict const lc)
/* Update statistics */
lc_stattime_t * restrict const lt = lc->stattime;
+ lc_statset_t * restrict const ls = lc->statset;
_Pragma ("omp critical") {
lt->time_count += 1.0;
@@ -671,19 +776,35 @@ lc_control_finish (lc_control_t * restrict const lc)
lt->time_calc_sum += time_calc_sum;
lt->time_calc_sum2 += time_calc_sum2;
+
+ ls->time_count += 1.0;
+
+ ls->time_setup_sum += time_setup_sum;
+ ls->time_setup_sum2 += time_setup_sum2;
+
+ ls->time_calc_sum += time_calc_sum;
+ ls->time_calc_sum2 += time_calc_sum2;
}
+ _Pragma ("omp master") {
+ lt->last_updated = time_calc_end;
+ }
+
+ _Pragma ("omp barrier");
+
{
DECLARE_CCTK_PARAMETERS;
if (use_simulated_annealing) {
_Pragma ("omp single") {
- lc_statset_t * restrict const ls = lc->statset;
lc_auto_finish (ls, lt);
}
}
+ if (use_random_restart_hill_climbing) {
+ _Pragma ("omp single") {
+ lc_hill_finish (ls, lt);
+ }
+ }
}
-
- _Pragma ("omp barrier");
}
@@ -722,6 +843,7 @@ lc_printstats (CCTK_ARGUMENTS)
printf (" statset #%d nthreads=%d npoints=[%d,%d,%d]\n",
nsets,
ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]);
+ double sum_count = 0.0;
double sum_setup = 0.0;
double sum_calc = 0.0;
double min_calc = DBL_MAX;
@@ -730,20 +852,26 @@ lc_printstats (CCTK_ARGUMENTS)
for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
printf (" stattime #%d topology=%d [%d,%d,%d] tiling=[%d,%d,%d]\n",
ntimes,
- lt->topology, lt->inthreads, lt->jnthreads, lt->knthreads,
+ lt->state.topology, lt->inthreads, lt->jnthreads, lt->knthreads,
lt->inpoints, lt->jnpoints, lt->knpoints);
+ double const count = lt->time_count;
+ double const setup = lt->time_setup_sum / count;
+ double const calc = lt->time_calc_sum / count;
printf (" count: %g setup: %g calc: %g\n",
- lt->time_count, lt->time_setup_sum, lt->time_calc_sum);
+ count, setup, calc);
+ sum_count += lt->time_count;
sum_setup += lt->time_setup_sum;
sum_calc += lt->time_calc_sum;
- if (lt->time_calc_sum < min_calc) {
- min_calc = lt->time_calc_sum;
+ if (calc < min_calc) {
+ min_calc = calc;
imin_calc = ntimes;
}
++ ntimes;
}
- printf (" total setup: %g total calc: %g min calc: %g (#%d)\n",
- sum_setup, sum_calc, min_calc, imin_calc);
+ printf (" total count: %g total setup: %g total calc: %g\n",
+ sum_count, sum_setup, sum_calc);
+ printf (" min calc: %g (#%d)\n",
+ min_calc, imin_calc);
++ nsets;
}
++ nmaps;
diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h
index 43ac5784b..5d640c3b6 100644
--- a/Carpet/LoopControl/src/loopcontrol.h
+++ b/Carpet/LoopControl/src/loopcontrol.h
@@ -36,7 +36,7 @@
/* A topology */
typedef struct lc_topology_t {
- int ni, nj, nk;
+ int nthreads[3];
} lc_topology_t;
/* A tiling specification */
@@ -46,9 +46,19 @@ typedef struct lc_tiling_t {
+typedef struct lc_state_t {
+ int topology;
+ int tiling[3];
+} lc_state_t;
+
+
+
/* For simulated annealing */
typedef struct lc_auto_state_t lc_auto_state_t;
+/* For hill climbing */
+typedef struct lc_hill_state_t lc_hill_state_t;
+
/* Statistics for one control parameter set (thread topology and
@@ -58,9 +68,8 @@ typedef struct lc_stattime_t {
/* Keys */
- int topology;
+ lc_state_t state;
int inthreads, jnthreads, knthreads;
- int tiling[3];
int inpoints, jnpoints, knpoints;
/* Data */
@@ -69,8 +78,12 @@ typedef struct lc_stattime_t {
double time_count; /* number of calls and threads */
double time_setup_sum, time_setup_sum2; /* time spent setting up loops */
double time_calc_sum, time_calc_sum2; /* time spent iterating */
+
+ double last_updated; /* wall time tag */
} lc_stattime_t;
+
+
/* Statistics for one user parameter set (number of threads and number
of iterations) of one loop */
typedef struct lc_statset_t {
@@ -90,13 +103,24 @@ typedef struct lc_statset_t {
/* Tiling specifications */
lc_tiling_t * restrict tilings[3];
int ntilings[3];
+ int * restrict topology_ntilings[3]; /* [dim][topology] */
/* Simulated annealing state */
lc_auto_state_t * auto_state;
+ /* Hill climbing state */
+ lc_hill_state_t * hill_state;
+
lc_stattime_t * stattime_list;
+
+ /* Statistics */
+ double time_count; /* number of calls and threads */
+ double time_setup_sum, time_setup_sum2; /* time spent setting up loops */
+ double time_calc_sum, time_calc_sum2; /* time spent iterating */
} lc_statset_t;
+
+
/* Statistics for one loop (one source code location) */
typedef struct lc_statmap_t {
struct lc_statmap_t * next; /* for linked list */
@@ -107,11 +131,75 @@ typedef struct lc_statmap_t {
lc_statset_t * statset_list;
} lc_statmap_t;
+
+
/* Linked list of all loop statistics structures */
extern lc_statmap_t * lc_statmap_list;
+static inline
+int
+lc_state_valid (lc_statset_t const * restrict const ls,
+ lc_state_t const * restrict const state)
+{
+ if (state->topology >= 0 && state->topology < ls->ntopologies) {
+ int const * restrict const ntilings =
+ ls->topology_ntilings[state->topology];
+ return (state->tiling[0] >= 0 && state->tiling[0] < ntilings[0] &&
+ state->tiling[1] >= 0 && state->tiling[1] < ntilings[1] &&
+ state->tiling[2] >= 0 && state->tiling[2] < ntilings[2]);
+ }
+ return 0;
+}
+
+static inline
+int
+lc_state_equal (lc_state_t const * restrict const state1,
+ lc_state_t const * restrict const state2)
+{
+ return (state1->topology == state2->topology &&
+ state1->tiling[0] == state2->tiling[0] &&
+ state1->tiling[1] == state2->tiling[1] &&
+ state1->tiling[2] == state2->tiling[2]);
+}
+
+
+
+void
+lc_stattime_init (lc_stattime_t * restrict const lt,
+ lc_statset_t * restrict const ls,
+ lc_state_t const * restrict const state);
+
+lc_stattime_t *
+lc_stattime_find (lc_statset_t const * restrict const ls,
+ lc_state_t const * restrict const state);
+
+lc_stattime_t *
+lc_stattime_find_create (lc_statset_t * restrict const ls,
+ lc_state_t const * restrict const state);
+
+
+
+/* TODO: introduce type for num_threads and npoints[3] */
+void
+lc_statset_init (lc_statset_t * restrict const ls,
+ lc_statmap_t * restrict const lm,
+ int const num_threads,
+ int const npoints[3]);
+
+lc_statset_t *
+lc_statset_find (lc_statmap_t const * restrict const lm,
+ int const num_threads,
+ int const npoints[3]);
+
+lc_statset_t *
+lc_statset_find_create (lc_statmap_t * restrict const lm,
+ int const num_threads,
+ int const npoints[3]);
+
+
+
typedef struct lc_control_t {
lc_statmap_t * restrict statmap;
lc_statset_t * restrict statset;
diff --git a/Carpet/LoopControl/src/make.code.defn b/Carpet/LoopControl/src/make.code.defn
index 808a7baae..426f7ee52 100644
--- a/Carpet/LoopControl/src/make.code.defn
+++ b/Carpet/LoopControl/src/make.code.defn
@@ -1,7 +1,7 @@
# Main make.code.defn file for thorn LoopControl
# Source files in this directory
-SRCS = lc_auto.c lc_siman.c loopcontrol.c loopcontrol.F90 loopcontrol_types.F90
+SRCS = loopcontrol.c loopcontrol.F90 loopcontrol_types.F90 lc_auto.c lc_siman.c lc_hill.c
# Subdirectories containing source files
SUBDIRS =