aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@cct.lsu.edu>2007-08-26 02:55:00 +0000
committerErik Schnetter <schnetter@cct.lsu.edu>2007-08-26 02:55:00 +0000
commite15f981b761acb114010667880ef59fd508ee41a (patch)
treebe57f518626e309b6a9b4219adfd1fcb4e89a7fe /Carpet
parentf6ff5079044289e1f748e800ce85e171fe766f5f (diff)
LoopControl: Add automatic configuration based on simulated annealing
(Nice idea, but doesn't seem to work right. Maybe only the parameters need to be chosen differently? But I rather think that a more intelligent method is necessary.) darcs-hash:20070826025505-dae7b-ed81bc28a4204d84776d28443be65a995c52699b.gz
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/LoopControl/configuration.ccl2
-rw-r--r--Carpet/LoopControl/param.ccl41
-rw-r--r--Carpet/LoopControl/src/lc_auto.c143
-rw-r--r--Carpet/LoopControl/src/lc_auto.h37
-rw-r--r--Carpet/LoopControl/src/lc_siman.c191
-rw-r--r--Carpet/LoopControl/src/lc_siman.h59
-rw-r--r--Carpet/LoopControl/src/loopcontrol.c100
-rw-r--r--Carpet/LoopControl/src/loopcontrol.h110
-rw-r--r--Carpet/LoopControl/src/loopcontrol_fortran.h64
-rw-r--r--Carpet/LoopControl/src/make.code.defn2
10 files changed, 643 insertions, 106 deletions
diff --git a/Carpet/LoopControl/configuration.ccl b/Carpet/LoopControl/configuration.ccl
index bd4cc4738..13410268c 100644
--- a/Carpet/LoopControl/configuration.ccl
+++ b/Carpet/LoopControl/configuration.ccl
@@ -1,5 +1,7 @@
# Configuration definition for thorn LoopControl
+REQUIRES GSL
+
PROVIDES LoopControl
{
}
diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl
index cdd922888..009ee76e5 100644
--- a/Carpet/LoopControl/param.ccl
+++ b/Carpet/LoopControl/param.ccl
@@ -61,3 +61,44 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove
BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover
{
} "no"
+
+
+
+BOOLEAN use_simulated_annealing "Find a good loop configuration through simulated annealing"
+{
+} "no"
+
+CCTK_INT siman_iters_fixed_T ""
+{
+ 1:* :: ""
+} 1
+
+CCTK_REAL siman_probability_change_topology ""
+{
+ 0:1 :: ""
+} 0.1
+
+CCTK_REAL siman_step_size ""
+{
+ (1.0:* :: ""
+} 3.0
+
+CCTK_REAL siman_k "energy scale"
+{
+ (0:* :: ""
+} 1.0e-9
+
+CCTK_REAL siman_T_initial "initial variability"
+{
+ (0:* :: ""
+} 1.0
+
+CCTK_REAL siman_mu_T "speed"
+{
+ (0:* :: ""
+} 1.005
+
+CCTK_REAL siman_T_min "stopping criterion"
+{
+ (0:* :: ""
+} 0.01
diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c
new file mode 100644
index 000000000..73d9a43fc
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_auto.c
@@ -0,0 +1,143 @@
+#include <assert.h>
+#include <math.h>
+
+#include <gsl/gsl_rng.h>
+
+#include <cctk.h>
+#include <cctk_Parameters.h>
+
+#include "lc_siman.h"
+
+#include "lc_auto.h"
+
+
+
+static lc_statset_t const * restrict statset;
+
+
+
+static inline
+int
+step (int const oldval, int const maxval,
+ gsl_rng const * restrict const rng, double const step_size)
+{
+ int const offset = (int) ((2 * gsl_rng_uniform_pos (rng) - 1) * step_size);
+ return (oldval + offset + maxval) % maxval;
+}
+
+
+
+static
+void
+take_step (const gsl_rng *rng, void *xp_, double step_size)
+{
+ DECLARE_CCTK_PARAMETERS;
+ lc_auto_position_t * restrict const xp = xp_;
+ if (gsl_rng_uniform (rng) < siman_probability_change_topology) {
+ xp->topology = gsl_rng_uniform_int (rng, statset->ntopologies);
+ }
+ for (int d=0; d<3; ++d) {
+ xp->tiling[d] = step (xp->tiling[d], statset->ntilings[d], rng, step_size);
+ }
+}
+
+static
+double
+distance (void *xp_, void *yp_)
+{
+ lc_auto_position_t const * restrict const xp = xp_;
+ lc_auto_position_t const * restrict const yp = yp_;
+ double dist = 10 * (xp->topology != yp->topology);
+ for (int d=0; d<3; ++d) {
+ dist += fabs (xp->tiling[d] - yp->tiling[d]);
+ }
+ return dist / 2; /* 2 = sqrt(4) */
+}
+
+static
+void
+print_position (void *xp_)
+{
+ lc_auto_position_t const * restrict const xp = xp_;
+ printf (" #%2d/[%3d,%3d,%3d]",
+ xp->topology,
+ xp->tiling[0], xp->tiling[1], xp->tiling[2]);
+}
+
+
+
+void
+lc_auto_init (lc_statset_t * restrict const ls,
+ int * restrict const topology,
+ int tiling[3])
+{
+ DECLARE_CCTK_PARAMETERS;
+ assert (! cycle_j_tilings); /* don't mix strategies */
+
+ if (! ls->auto_state) {
+ /* First call for this user parameter set of this loop */
+
+ ls->auto_state = malloc (sizeof * ls->auto_state);
+
+ /* Initialise simulated annealing parameters */
+ ls->auto_state->siman_params.iters_fixed_T = siman_iters_fixed_T;
+ ls->auto_state->siman_params.step_size = siman_step_size;
+ ls->auto_state->siman_params.k = siman_k;
+ ls->auto_state->siman_params.t_initial = siman_T_initial;
+ ls->auto_state->siman_params.mu_t = siman_mu_T;
+ ls->auto_state->siman_params.t_min = siman_T_min;
+
+ /* Create random number generator */
+ gsl_rng_env_setup();
+ ls->auto_state->rng = gsl_rng_alloc (gsl_rng_default);
+
+ /* Set initial position */
+ ls->auto_state->position.topology = * topology;
+ for (int d=0; d<3; ++d) {
+ ls->auto_state->position.tiling[d] = tiling[d];
+ }
+
+ /* Initialise simulated annealing state */
+ statset = ls;
+ ls->auto_state->siman_state =
+ lc_siman_solve (NULL,
+ ls->auto_state->rng,
+ NULL, 0.0,
+ take_step, distance, print_position,
+ sizeof (lc_auto_position_t),
+ ls->auto_state->siman_params);
+
+ } else {
+ /* Not the first call */
+
+ if (ls->auto_state->siman_state) {
+ /* The solver is still active: ask for the next state */
+ statset = ls;
+ ls->auto_state->siman_state =
+ lc_siman_solve (ls->auto_state->siman_state,
+ ls->auto_state->rng,
+ & ls->auto_state->position, ls->auto_state->time,
+ take_step, distance, print_position,
+ sizeof (lc_auto_position_t),
+ ls->auto_state->siman_params);
+ }
+
+ /* Set thread topology and tiling specification */
+ * topology = ls->auto_state->position.topology;
+ for (int d=0; d<3; ++d) {
+ tiling[d] = ls->auto_state->position.tiling[d];
+ }
+
+ } /* if not the first call */
+}
+
+
+
+void
+lc_auto_finish (lc_statset_t * restrict const ls,
+ lc_stattime_t const * restrict const lt)
+{
+ ls->auto_state->time =
+ lt->time_calc_sum /
+ (lt->time_count * ls->npoints[0] * ls->npoints[1] * ls->npoints[2]);
+}
diff --git a/Carpet/LoopControl/src/lc_auto.h b/Carpet/LoopControl/src/lc_auto.h
new file mode 100644
index 000000000..a0c7b1798
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_auto.h
@@ -0,0 +1,37 @@
+#ifndef LC_AUTO_H
+#define LC_AUTO_H
+
+#include <cctk.h>
+
+#include "lc_siman.h"
+#include "loopcontrol.h"
+
+
+
+typedef struct lc_auto_position_t {
+ int topology;
+ int tiling[3];
+} lc_auto_position_t;
+
+/* no typedef here; forward declcared in loopcontrol.h */
+struct lc_auto_state_t {
+ lc_siman_params_t siman_params;
+ gsl_rng * rng;
+ lc_siman_state_t * siman_state;
+
+ lc_auto_position_t position;
+ double time;
+};
+
+
+
+void
+lc_auto_init (lc_statset_t * restrict const ls,
+ int * restrict const topology,
+ int tiling[3]);
+
+void
+lc_auto_finish (lc_statset_t * restrict const ls,
+ lc_stattime_t const * restrict const lt);
+
+#endif /* #ifndef LC_AUTO_H */
diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c
new file mode 100644
index 000000000..a6fa22bc7
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_siman.c
@@ -0,0 +1,191 @@
+/* Simulated annealing */
+
+/* Adapted from GSL, the GNU Scientific Library, version 1.9 */
+
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_machine.h>
+#include <gsl/gsl_rng.h>
+
+#include "lc_siman.h"
+
+static inline
+double safe_exp (double x) /* avoid underflow errors for large uphill steps */
+{
+ return (x < GSL_LOG_DBL_MIN) ? 0.0 : exp(x);
+}
+
+/* this structure contains internal state information for
+ lc_siman_solve */
+
+typedef enum { state_initial, state_first, state_looping } lc_state_t;
+
+/* no typedef here; forward declcared in lc_siman.h */
+struct lc_siman_state_t {
+ lc_state_t state;
+ const gsl_rng *r;
+ lc_siman_step_t take_step;
+ lc_siman_metric_t distance;
+ lc_siman_print_t print_position;
+ size_t element_size;
+ lc_siman_params_t params;
+ void *x, *new_x, *best_x;
+ double E, new_E, best_E;
+ int i, done;
+ double T;
+ int n_evals, n_iter, n_accepts, n_rejects, n_eless;
+};
+
+/* implementation of a basic simulated annealing algorithm */
+
+lc_siman_state_t *
+lc_siman_solve (lc_siman_state_t *restrict state,
+ const gsl_rng *restrict r,
+ void *restrict x0_p, double E0,
+ lc_siman_step_t take_step,
+ lc_siman_metric_t distance,
+ lc_siman_print_t print_position,
+ size_t element_size,
+ lc_siman_params_t params)
+{
+ if (!state) {
+ state = malloc(sizeof *state);
+ state->state = state_initial;
+ }
+ switch (state->state) {
+ case state_initial: goto label_initial;
+ case state_first : goto label_first;
+ case state_looping: goto label_looping;
+ }
+ abort();
+
+ label_initial:;
+ state->n_evals = 1;
+ state->n_iter = 0;
+
+ state->state = state_first;
+ return state;
+
+ label_first:;
+ state->E = E0;
+
+ state->x = malloc (element_size);
+ memcpy (state->x, x0_p, element_size);
+ state->new_x = malloc (element_size);
+ state->best_x = malloc (element_size);
+ memcpy (state->best_x, x0_p, element_size);
+
+ state->best_E = state->E;
+
+ state->T = params.t_initial;
+ state->done = 0;
+
+ if (print_position) {
+ printf ("#-iter #-evals temperature position energy\n");
+ }
+
+ /* while (!done) */
+ begin_while_notdone:;
+ if (state->done) goto end_while_notdone;
+
+ state->n_accepts = 0;
+ state->n_rejects = 0;
+ state->n_eless = 0;
+
+ /* for (i = 0; i < params.iters_fixed_T; ++i) */
+ state->i = 0;
+ begin_for_i:;
+ if (state->i >= params.iters_fixed_T) goto end_for_i;
+
+ memcpy (state->new_x, state->x, element_size);
+
+ take_step (r, state->new_x, params.step_size);
+ memcpy (x0_p, state->new_x, element_size);
+
+ state->state = state_looping;
+ return state;
+
+ label_looping:;
+ state->new_E = E0;
+
+ if (state->new_E <= state->best_E) {
+ memcpy (state->best_x, state->new_x, element_size);
+ state->best_E = state->new_E;
+ }
+
+ ++state->n_evals; /* keep track of evaluations */
+ /* now take the crucial step: see if the new point is accepted or
+ not, as determined by the boltzman probability */
+ if (state->new_E < state->E) {
+ /* yay! take a step */
+ memcpy (state->x, state->new_x, element_size);
+ state->E = state->new_E;
+ ++state->n_eless;
+ } else if (gsl_rng_uniform(r) <
+ safe_exp (-(state->new_E - state->E)/(params.k * state->T)))
+ {
+ /* yay! take a step */
+ memcpy(state->x, state->new_x, element_size);
+ state->E = state->new_E;
+ ++state->n_accepts;
+ } else {
+ ++state->n_rejects;
+ }
+
+ ++state->i;
+ goto begin_for_i;
+ end_for_i:;
+
+ if (print_position) {
+ /* see if we need to print stuff as we go */
+ /* printf("%5d %12g %5d %3d %3d %3d", n_iter, T, n_evals, */
+ /* 100*n_eless/n_steps, 100*n_accepts/n_steps, */
+ /* 100*n_rejects/n_steps); */
+ printf ("%5d %7d %12g", state->n_iter, state->n_evals, state->T);
+ print_position (state->x);
+ printf (" %12g\n", state->E);
+ }
+
+ /* apply the cooling schedule to the temperature */
+ /* FIXME: I should also introduce a cooling schedule for the iters */
+ state->T /= params.mu_t;
+ ++state->n_iter;
+ if (state->T < params.t_min) {
+ state->done = 1;
+ }
+
+ goto begin_while_notdone;
+ end_while_notdone:;
+
+ /* at the end, copy the result onto the initial point, so we pass it
+ back to the caller */
+ memcpy (x0_p, state->best_x, element_size);
+
+ free (state->x);
+ free (state->new_x);
+ free (state->best_x);
+
+ free (state);
+ return NULL;
+}
diff --git a/Carpet/LoopControl/src/lc_siman.h b/Carpet/LoopControl/src/lc_siman.h
new file mode 100644
index 000000000..c848ba378
--- /dev/null
+++ b/Carpet/LoopControl/src/lc_siman.h
@@ -0,0 +1,59 @@
+/* Simulated annealing */
+
+/* Adapted from GSL, the GNU Scientific Library, version 1.9 */
+
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SIMAN_H
+#define SIMAN_H
+
+#include <stdlib.h>
+#include <gsl/gsl_rng.h>
+
+/* types for the function pointers passed to lc_siman_solve */
+
+typedef void (*lc_siman_step_t) (const gsl_rng *r, void *xp, double step_size);
+typedef double (*lc_siman_metric_t) (void *xp, void *yp);
+typedef void (*lc_siman_print_t) (void *xp);
+
+/* this structure contains all the information needed to structure the
+ search, beyond the energy function, the step function and the
+ initial guess. */
+
+typedef struct {
+ int iters_fixed_T; /* how many iterations at each temperature? */
+ double step_size; /* max step size in the random walk */
+ /* the following parameters are for the Boltzmann distribution */
+ double k, t_initial, mu_t, t_min;
+} lc_siman_params_t;
+
+/* prototype for the workhorse function */
+
+typedef struct lc_siman_state_t lc_siman_state_t;
+
+lc_siman_state_t *
+lc_siman_solve (lc_siman_state_t *restrict state,
+ const gsl_rng *restrict r,
+ void *restrict x0_p, double E,
+ lc_siman_step_t take_step,
+ lc_siman_metric_t distance,
+ lc_siman_print_t print_position,
+ size_t element_size,
+ lc_siman_params_t params);
+
+#endif /* #ifndef SIMAN_H */
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index dd7d3f57f..32661064b 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -15,6 +15,7 @@
#include <cctk_Parameters.h>
#include "loopcontrol.h"
+#include "lc_auto.h"
@@ -49,14 +50,21 @@ omp_get_wtime (void)
/* Linked list of all loop statistics structures */
-struct lc_statmap_t * lc_statmap_list = NULL;
+lc_statmap_t * lc_statmap_list = NULL;
/* Find all possible thread topologies */
+/* This finds all possible thread topologies which can be expressed as
+ NIxNJxNK. More complex topologies, e.g. based on a recursive
+ subdiviston, are not considered (and cannot be expressed in the
+ data structures used in LoopControl). I think more complex
+ topologies are not necessary, since the number of treads is usually
+ quite small and contains many small factors in its prime
+ decomposition. */
static
void
-find_thread_topologies (struct lc_topology_t * restrict const topologies,
+find_thread_topologies (lc_topology_t * restrict const topologies,
const int maxntopologies,
int * restrict const ntopologies,
int const nthreads)
@@ -83,17 +91,27 @@ find_thread_topologies (struct lc_topology_t * restrict const topologies,
/* Find "good" tiling specifications */
+/* This calculates a subset of all possible thread specifications.
+ One aim is to reduce the search space by disregarding some
+ specifications. The other aim is to distribute the specifications
+ "equally", so that one does not have to spend much effort
+ investigating tilign specifications with very similar properties.
+ For example, if there are 200 grid points, then half of the
+ possible tiling specifications consists of splitting the domain
+ into two subdomains with [100+N, 100-N] points. This is avoided by
+ covering all possible tiling specifications in exponentially
+ growing step sizes. */
static
int tiling_compare (const void * const a, const void * const b)
{
- struct lc_tiling_t const * const aa = a;
- struct lc_tiling_t const * const bb = b;
+ lc_tiling_t const * const aa = a;
+ lc_tiling_t const * const bb = b;
return aa->npoints - bb->npoints;
}
static
void
-find_tiling_specifications (struct lc_tiling_t * restrict const tilings,
+find_tiling_specifications (lc_tiling_t * restrict const tilings,
const int maxntilings,
int * restrict const ntilings,
int const npoints)
@@ -142,8 +160,8 @@ find_tiling_specifications (struct lc_tiling_t * restrict const tilings,
/* Initialise control parameter set statistics */
static
void
-lc_stattime_init (struct lc_stattime_t * restrict const lt,
- struct lc_statset_t * restrict const ls,
+lc_stattime_init (lc_stattime_t * restrict const lt,
+ lc_statset_t * restrict const ls,
int const topology,
int const tiling[3])
{
@@ -239,14 +257,14 @@ lc_stattime_init (struct lc_stattime_t * restrict const lt,
}
static
-struct lc_stattime_t *
-lc_stattime_find (struct lc_statset_t * restrict const ls,
+lc_stattime_t *
+lc_stattime_find (lc_statset_t * restrict const ls,
int const topology,
int const tiling[3])
{
assert (ls);
- struct lc_stattime_t * lt;
+ lc_stattime_t * lt;
for (lt = ls->stattime_list; lt; lt = lt->next) {
if (lt->topology == topology &&
@@ -271,8 +289,8 @@ lc_stattime_find (struct lc_statset_t * restrict const ls,
/* Initialise user parameter set statistics */
static
void
-lc_statset_init (struct lc_statset_t * restrict const ls,
- struct lc_statmap_t * restrict const lm,
+lc_statset_init (lc_statset_t * restrict const ls,
+ lc_statmap_t * restrict const lm,
int const num_threads,
int const npoints[3])
{
@@ -344,6 +362,11 @@ lc_statset_init (struct lc_statset_t * restrict const ls,
+ /* Simulated annealing state */
+ ls->auto_state = NULL;
+
+
+
/* Initialise list */
ls->stattime_list = NULL;
@@ -355,14 +378,14 @@ lc_statset_init (struct lc_statset_t * restrict const ls,
}
static
-struct lc_statset_t *
-lc_statset_find (struct lc_statmap_t * restrict const lm,
+lc_statset_t *
+lc_statset_find (lc_statmap_t * restrict const lm,
int const num_threads,
int const npoints[3])
{
assert (lm);
- struct lc_statset_t * ls;
+ lc_statset_t * ls;
for (ls = lm->statset_list; ls; ls = ls->next) {
if (ls->num_threads == num_threads &&
@@ -386,7 +409,7 @@ lc_statset_find (struct lc_statmap_t * restrict const lm,
/* Initialise loop statistics */
void
-lc_statmap_init (struct lc_statmap_t * restrict const lm,
+lc_statmap_init (lc_statmap_t * restrict const lm,
char const * restrict const name)
{
/* Check arguments */
@@ -406,8 +429,8 @@ lc_statmap_init (struct lc_statmap_t * restrict const lm,
void
-lc_control_init (struct lc_control_t * restrict const lc,
- struct lc_statmap_t * restrict const lm,
+lc_control_init (lc_control_t * restrict const lc,
+ lc_statmap_t * restrict const lm,
int const imin, int const jmin, int const kmin,
int const imax, int const jmax, int const kmax,
int const ilsh, int const jlsh, int const klsh)
@@ -438,7 +461,7 @@ lc_control_init (struct lc_control_t * restrict const lc,
- struct lc_statset_t * restrict ls;
+ lc_statset_t * restrict ls;
_Pragma ("omp single copyprivate (ls)") {
/* Get number of threads */
int const num_threads = omp_get_num_threads();
@@ -454,7 +477,7 @@ lc_control_init (struct lc_control_t * restrict const lc,
- struct lc_stattime_t * restrict lt;
+ lc_stattime_t * restrict lt;
_Pragma ("omp single copyprivate (lt)") {
/* Select topology */
@@ -518,6 +541,11 @@ lc_control_init (struct lc_control_t * restrict const lc,
tiling[2] = ls->ntilings[2] - 1; /* as many points as possible */
}
+ /* Use simulated annealing to find the best loop configuration */
+ if (use_simulated_annealing) {
+ lc_auto_init (ls, & topology, tiling);
+ }
+
/* Find or create database entry */
lt = lc_stattime_find (ls, topology, tiling);
@@ -618,7 +646,7 @@ lc_control_init (struct lc_control_t * restrict const lc,
void
-lc_control_finish (struct lc_control_t * restrict const lc)
+lc_control_finish (lc_control_t * restrict const lc)
{
/* Timer */
double const time_calc_end = omp_get_wtime();
@@ -632,9 +660,9 @@ lc_control_finish (struct lc_control_t * restrict const lc)
double const time_calc_sum = time_calc_end - time_calc_begin;
double const time_calc_sum2 = pow (time_calc_sum, 2);
-
+
/* Update statistics */
- struct lc_stattime_t * restrict const lt = lc->stattime;
+ lc_stattime_t * restrict const lt = lc->stattime;
_Pragma ("omp critical") {
lt->time_count += 1.0;
@@ -644,6 +672,18 @@ lc_control_finish (struct lc_control_t * restrict const lc)
lt->time_calc_sum += time_calc_sum;
lt->time_calc_sum2 += time_calc_sum2;
}
+
+ {
+ DECLARE_CCTK_PARAMETERS;
+ if (use_simulated_annealing) {
+ _Pragma ("omp single") {
+ lc_statset_t * restrict const ls = lc->statset;
+ lc_auto_finish (ls, lt);
+ }
+ }
+ }
+
+ _Pragma ("omp barrier");
}
@@ -673,12 +713,12 @@ lc_printstats (CCTK_ARGUMENTS)
if (! verbose) return;
int nmaps = 0;
- for (struct lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) {
+ for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) {
printf ("statmap #%d \"%s\":\n",
nmaps,
lm->name);
int nsets = 0;
- for (struct lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) {
+ for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) {
printf (" statset #%d nthreads=%d npoints=[%d,%d,%d]\n",
nsets,
ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]);
@@ -687,7 +727,7 @@ lc_printstats (CCTK_ARGUMENTS)
double min_calc = DBL_MAX;
int imin_calc = -1;
int ntimes = 0;
- for (struct lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
+ for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) {
printf (" stattime #%d topology=%d [%d,%d,%d] tiling=[%d,%d,%d]\n",
ntimes,
lt->topology, lt->inthreads, lt->jnthreads, lt->knthreads,
@@ -714,7 +754,7 @@ lc_printstats (CCTK_ARGUMENTS)
CCTK_FCALL
void
-CCTK_FNAME (lc_statmap_init) (struct lc_statmap_t * restrict const lm,
+CCTK_FNAME (lc_statmap_init) (lc_statmap_t * restrict const lm,
ONE_FORTSTRING_ARG)
{
ONE_FORTSTRING_CREATE (name);
@@ -724,8 +764,8 @@ CCTK_FNAME (lc_statmap_init) (struct lc_statmap_t * restrict const lm,
CCTK_FCALL
void
-CCTK_FNAME (lc_control_init) (struct lc_control_t * restrict const lc,
- struct lc_statmap_t * restrict const lm,
+CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc,
+ lc_statmap_t * restrict const lm,
int const * restrict const imin,
int const * restrict const jmin,
int const * restrict const kmin,
@@ -744,7 +784,7 @@ CCTK_FNAME (lc_control_init) (struct lc_control_t * restrict const lc,
CCTK_FCALL
void
-CCTK_FNAME (lc_control_finish) (struct lc_control_t * restrict const lc)
+CCTK_FNAME (lc_control_finish) (lc_control_t * restrict const lc)
{
lc_control_finish (lc);
}
diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h
index 440e5ab26..43ac5784b 100644
--- a/Carpet/LoopControl/src/loopcontrol.h
+++ b/Carpet/LoopControl/src/loopcontrol.h
@@ -35,20 +35,25 @@
/* A topology */
-struct lc_topology_t {
+typedef struct lc_topology_t {
int ni, nj, nk;
-};
+} lc_topology_t;
/* A tiling specification */
-struct lc_tiling_t {
+typedef struct lc_tiling_t {
int npoints;
-};
+} lc_tiling_t;
+
+
+
+/* For simulated annealing */
+typedef struct lc_auto_state_t lc_auto_state_t;
/* Statistics for one control parameter set (thread topology and
tiling specification) of one user parameter set of one loop */
-struct lc_stattime_t {
+typedef struct lc_stattime_t {
struct lc_stattime_t * next;
/* Keys */
@@ -64,11 +69,11 @@ struct lc_stattime_t {
double time_count; /* number of calls and threads */
double time_setup_sum, time_setup_sum2; /* time spent setting up loops */
double time_calc_sum, time_calc_sum2; /* time spent iterating */
-};
+} lc_stattime_t;
/* Statistics for one user parameter set (number of threads and number
of iterations) of one loop */
-struct lc_statset_t {
+typedef struct lc_statset_t {
struct lc_statset_t * next;
/* Keys */
@@ -79,35 +84,38 @@ struct lc_statset_t {
/* Data */
/* Thread topologies */
- struct lc_topology_t * restrict topologies;
+ lc_topology_t * restrict topologies;
int ntopologies;
/* Tiling specifications */
- struct lc_tiling_t * restrict tilings[3];
+ lc_tiling_t * restrict tilings[3];
int ntilings[3];
- struct lc_stattime_t * stattime_list;
-};
+ /* Simulated annealing state */
+ lc_auto_state_t * auto_state;
+
+ lc_stattime_t * stattime_list;
+} lc_statset_t;
/* Statistics for one loop (one source code location) */
-struct lc_statmap_t {
+typedef struct lc_statmap_t {
struct lc_statmap_t * next; /* for linked list */
/* Name */
char const * restrict name;
- struct lc_statset_t * statset_list;
-};
+ lc_statset_t * statset_list;
+} lc_statmap_t;
/* Linked list of all loop statistics structures */
-extern struct lc_statmap_t * lc_statmap_list;
+extern lc_statmap_t * lc_statmap_list;
-struct lc_control_t {
- struct lc_statmap_t * restrict statmap;
- struct lc_statset_t * restrict statset;
- struct lc_stattime_t * restrict stattime;
+typedef struct lc_control_t {
+ lc_statmap_t * restrict statmap;
+ lc_statset_t * restrict statset;
+ lc_stattime_t * restrict stattime;
/* Copy of arguments (useful for debugging) */
int imin, jmin, kmin;
@@ -130,7 +138,7 @@ struct lc_control_t {
/* Timing statistics */
double time_setup_begin, time_calc_begin;
-};
+} lc_control_t;
@@ -151,24 +159,24 @@ lc_max (int const i, int const j)
void
-lc_statmap_init (struct lc_statmap_t * restrict ls,
+lc_statmap_init (lc_statmap_t * restrict ls,
char const * restrict name);
void
-lc_control_init (struct lc_control_t * restrict lc,
- struct lc_statmap_t * restrict lm,
+lc_control_init (lc_control_t * restrict lc,
+ lc_statmap_t * restrict lm,
int imin, int jmin, int kmin,
int imax, int jmax, int kmax,
int ilsh, int jlsh, int klsh);
void
-lc_control_finish (struct lc_control_t * restrict lc);
+lc_control_finish (lc_control_t * restrict lc);
#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \
do { \
- static struct lc_statmap_t lc_lm; \
+ static lc_statmap_t lc_lm; \
static int lc_initialised = 0; \
if (! lc_initialised) { \
_Pragma ("omp single") { \
@@ -179,7 +187,7 @@ lc_control_finish (struct lc_control_t * restrict lc);
lc_initialised = 1; \
} \
} \
- struct lc_control_t lc_lc; \
+ lc_control_t lc_lc; \
lc_control_init (& lc_lc, & lc_lm, \
imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh); \
\
@@ -224,55 +232,7 @@ lc_printstats (CCTK_ARGUMENTS);
#ifdef FCODE
-
-#define LC_DECLARE3(name, i,j,k) &&\
-type (lc_statmap_t), save :: name/**/_lm &&\
-logical, save :: name/**/_initialised = .false. &&\
-type (lc_control_t) :: name/**/_lc &&\
-integer :: name/**/_ii, name/**/_jj, name/**/_kk &&\
-integer :: name/**/_imax, name/**/_jmax, name/**/_kmax &&\
-integer :: i, j, k
-
-#define LC_PRIVATE3(name) \
-name/**/_lc, \
-name/**/_imax, name/**/_jmax, name/**/_kmax
-
-#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\
-if (.not. name/**/_initialised) then &&\
-!$omp single &&\
- call lc_statmap_init (name/**/_lm, "name") &&\
-!$omp end single &&\
-!$omp single &&\
- /* Set this flag only after initialising */ &&\
- name/**/_initialised = .true. &&\
-!$omp end single &&\
-end if &&\
-call lc_control_init (name/**/_lc, name/**/_lm, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\
- &&\
-/* Coarse loop */ &&\
-do name/**/_kk = name/**/_lc%kkmin + 1, name/**/_lc%kkmax, name/**/_lc%kkstep &&\
- name/**/_kmax = min (name/**/_kk - 1 + name/**/_lc%kkstep, name/**/_lc%kkmax) &&\
- do name/**/_jj = name/**/_lc%jjmin + 1, name/**/_lc%jjmax, name/**/_lc%jjstep &&\
- name/**/_jmax = min (name/**/_jj - 1 + name/**/_lc%jjstep, name/**/_lc%jjmax) &&\
- do name/**/_ii = name/**/_lc%iimin + 1, name/**/_lc%iimax, name/**/_lc%iistep &&\
- name/**/_imax = min (name/**/_ii - 1 + name/**/_lc%iistep, name/**/_lc%iimax) &&\
- &&\
- /* Fine loop */ &&\
- do k = name/**/_kk, name/**/_kmax &&\
- do j = name/**/_jj, name/**/_jmax &&\
- do i = name/**/_ii, name/**/_imax
-
-#define LC_ENDLOOP3(name) &&\
- end do &&\
- end do &&\
- end do &&\
- &&\
- end do &&\
- end do &&\
-end do &&\
- &&\
-call lc_control_finish (name/**/_lc)
-
+# include "loopcontrol_fortran.h"
#endif
#endif /* ifndef LC_LOOPCONTROL_H */
diff --git a/Carpet/LoopControl/src/loopcontrol_fortran.h b/Carpet/LoopControl/src/loopcontrol_fortran.h
new file mode 100644
index 000000000..e80bf358f
--- /dev/null
+++ b/Carpet/LoopControl/src/loopcontrol_fortran.h
@@ -0,0 +1,64 @@
+/* -*-f90-mode-*- */
+
+#ifndef LOOPCONTROL_FORTRAN_H
+#define LOOPCONTROL_FORTRAN_H
+
+
+
+#define LC_DECLARE3(name, i,j,k) &&\
+type (lc_statmap_t), save :: name/**/_lm &&\
+logical, save :: name/**/_initialised = .false. &&\
+type (lc_control_t) :: name/**/_lc &&\
+integer :: name/**/_ii, name/**/_jj, name/**/_kk &&\
+integer :: name/**/_imax, name/**/_jmax, name/**/_kmax &&\
+integer :: i, j, k
+
+
+
+#define LC_PRIVATE3(name) \
+name/**/_lc, \
+name/**/_imax, name/**/_jmax, name/**/_kmax
+
+
+
+#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\
+if (.not. name/**/_initialised) then &&\
+!$omp single &&\
+ call lc_statmap_init (name/**/_lm, "name") &&\
+!$omp end single &&\
+!$omp single &&\
+ /* Set this flag only after initialising */ &&\
+ name/**/_initialised = .true. &&\
+!$omp end single &&\
+end if &&\
+call lc_control_init (name/**/_lc, name/**/_lm, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\
+ &&\
+/* Coarse loop */ &&\
+do name/**/_kk = name/**/_lc%kkmin + 1, name/**/_lc%kkmax, name/**/_lc%kkstep &&\
+ name/**/_kmax = min (name/**/_kk - 1 + name/**/_lc%kkstep, name/**/_lc%kkmax) &&\
+ do name/**/_jj = name/**/_lc%jjmin + 1, name/**/_lc%jjmax, name/**/_lc%jjstep &&\
+ name/**/_jmax = min (name/**/_jj - 1 + name/**/_lc%jjstep, name/**/_lc%jjmax) &&\
+ do name/**/_ii = name/**/_lc%iimin + 1, name/**/_lc%iimax, name/**/_lc%iistep &&\
+ name/**/_imax = min (name/**/_ii - 1 + name/**/_lc%iistep, name/**/_lc%iimax) &&\
+ &&\
+ /* Fine loop */ &&\
+ do k = name/**/_kk, name/**/_kmax &&\
+ do j = name/**/_jj, name/**/_jmax &&\
+ do i = name/**/_ii, name/**/_imax
+
+
+
+#define LC_ENDLOOP3(name) &&\
+ end do &&\
+ end do &&\
+ end do &&\
+ &&\
+ end do &&\
+ end do &&\
+end do &&\
+ &&\
+call lc_control_finish (name/**/_lc)
+
+
+
+#endif /* #ifndef LOOPCONTROL_FORTRAN_H */
diff --git a/Carpet/LoopControl/src/make.code.defn b/Carpet/LoopControl/src/make.code.defn
index c37055419..808a7baae 100644
--- a/Carpet/LoopControl/src/make.code.defn
+++ b/Carpet/LoopControl/src/make.code.defn
@@ -1,7 +1,7 @@
# Main make.code.defn file for thorn LoopControl
# Source files in this directory
-SRCS = loopcontrol.c loopcontrol.F90 loopcontrol_types.F90
+SRCS = lc_auto.c lc_siman.c loopcontrol.c loopcontrol.F90 loopcontrol_types.F90
# Subdirectories containing source files
SUBDIRS =