From e15f981b761acb114010667880ef59fd508ee41a Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Sun, 26 Aug 2007 02:55:00 +0000 Subject: LoopControl: Add automatic configuration based on simulated annealing (Nice idea, but doesn't seem to work right. Maybe only the parameters need to be chosen differently? But I rather think that a more intelligent method is necessary.) darcs-hash:20070826025505-dae7b-ed81bc28a4204d84776d28443be65a995c52699b.gz --- Carpet/LoopControl/configuration.ccl | 2 + Carpet/LoopControl/param.ccl | 41 ++++++ Carpet/LoopControl/src/lc_auto.c | 143 ++++++++++++++++++++ Carpet/LoopControl/src/lc_auto.h | 37 ++++++ Carpet/LoopControl/src/lc_siman.c | 191 +++++++++++++++++++++++++++ Carpet/LoopControl/src/lc_siman.h | 59 +++++++++ Carpet/LoopControl/src/loopcontrol.c | 100 +++++++++----- Carpet/LoopControl/src/loopcontrol.h | 110 +++++---------- Carpet/LoopControl/src/loopcontrol_fortran.h | 64 +++++++++ Carpet/LoopControl/src/make.code.defn | 2 +- 10 files changed, 643 insertions(+), 106 deletions(-) create mode 100644 Carpet/LoopControl/src/lc_auto.c create mode 100644 Carpet/LoopControl/src/lc_auto.h create mode 100644 Carpet/LoopControl/src/lc_siman.c create mode 100644 Carpet/LoopControl/src/lc_siman.h create mode 100644 Carpet/LoopControl/src/loopcontrol_fortran.h (limited to 'Carpet/LoopControl') diff --git a/Carpet/LoopControl/configuration.ccl b/Carpet/LoopControl/configuration.ccl index bd4cc4738..13410268c 100644 --- a/Carpet/LoopControl/configuration.ccl +++ b/Carpet/LoopControl/configuration.ccl @@ -1,5 +1,7 @@ # Configuration definition for thorn LoopControl +REQUIRES GSL + PROVIDES LoopControl { } diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl index cdd922888..009ee76e5 100644 --- a/Carpet/LoopControl/param.ccl +++ b/Carpet/LoopControl/param.ccl @@ -61,3 +61,44 @@ CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recove BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover { } "no" + + + +BOOLEAN use_simulated_annealing "Find a good loop configuration through simulated annealing" +{ +} "no" + +CCTK_INT siman_iters_fixed_T "" +{ + 1:* :: "" +} 1 + +CCTK_REAL siman_probability_change_topology "" +{ + 0:1 :: "" +} 0.1 + +CCTK_REAL siman_step_size "" +{ + (1.0:* :: "" +} 3.0 + +CCTK_REAL siman_k "energy scale" +{ + (0:* :: "" +} 1.0e-9 + +CCTK_REAL siman_T_initial "initial variability" +{ + (0:* :: "" +} 1.0 + +CCTK_REAL siman_mu_T "speed" +{ + (0:* :: "" +} 1.005 + +CCTK_REAL siman_T_min "stopping criterion" +{ + (0:* :: "" +} 0.01 diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c new file mode 100644 index 000000000..73d9a43fc --- /dev/null +++ b/Carpet/LoopControl/src/lc_auto.c @@ -0,0 +1,143 @@ +#include +#include + +#include + +#include +#include + +#include "lc_siman.h" + +#include "lc_auto.h" + + + +static lc_statset_t const * restrict statset; + + + +static inline +int +step (int const oldval, int const maxval, + gsl_rng const * restrict const rng, double const step_size) +{ + int const offset = (int) ((2 * gsl_rng_uniform_pos (rng) - 1) * step_size); + return (oldval + offset + maxval) % maxval; +} + + + +static +void +take_step (const gsl_rng *rng, void *xp_, double step_size) +{ + DECLARE_CCTK_PARAMETERS; + lc_auto_position_t * restrict const xp = xp_; + if (gsl_rng_uniform (rng) < siman_probability_change_topology) { + xp->topology = gsl_rng_uniform_int (rng, statset->ntopologies); + } + for (int d=0; d<3; ++d) { + xp->tiling[d] = step (xp->tiling[d], statset->ntilings[d], rng, step_size); + } +} + +static +double +distance (void *xp_, void *yp_) +{ + lc_auto_position_t const * restrict const xp = xp_; + lc_auto_position_t const * restrict const yp = yp_; + double dist = 10 * (xp->topology != yp->topology); + for (int d=0; d<3; ++d) { + dist += fabs (xp->tiling[d] - yp->tiling[d]); + } + return dist / 2; /* 2 = sqrt(4) */ +} + +static +void +print_position (void *xp_) +{ + lc_auto_position_t const * restrict const xp = xp_; + printf (" #%2d/[%3d,%3d,%3d]", + xp->topology, + xp->tiling[0], xp->tiling[1], xp->tiling[2]); +} + + + +void +lc_auto_init (lc_statset_t * restrict const ls, + int * restrict const topology, + int tiling[3]) +{ + DECLARE_CCTK_PARAMETERS; + assert (! cycle_j_tilings); /* don't mix strategies */ + + if (! ls->auto_state) { + /* First call for this user parameter set of this loop */ + + ls->auto_state = malloc (sizeof * ls->auto_state); + + /* Initialise simulated annealing parameters */ + ls->auto_state->siman_params.iters_fixed_T = siman_iters_fixed_T; + ls->auto_state->siman_params.step_size = siman_step_size; + ls->auto_state->siman_params.k = siman_k; + ls->auto_state->siman_params.t_initial = siman_T_initial; + ls->auto_state->siman_params.mu_t = siman_mu_T; + ls->auto_state->siman_params.t_min = siman_T_min; + + /* Create random number generator */ + gsl_rng_env_setup(); + ls->auto_state->rng = gsl_rng_alloc (gsl_rng_default); + + /* Set initial position */ + ls->auto_state->position.topology = * topology; + for (int d=0; d<3; ++d) { + ls->auto_state->position.tiling[d] = tiling[d]; + } + + /* Initialise simulated annealing state */ + statset = ls; + ls->auto_state->siman_state = + lc_siman_solve (NULL, + ls->auto_state->rng, + NULL, 0.0, + take_step, distance, print_position, + sizeof (lc_auto_position_t), + ls->auto_state->siman_params); + + } else { + /* Not the first call */ + + if (ls->auto_state->siman_state) { + /* The solver is still active: ask for the next state */ + statset = ls; + ls->auto_state->siman_state = + lc_siman_solve (ls->auto_state->siman_state, + ls->auto_state->rng, + & ls->auto_state->position, ls->auto_state->time, + take_step, distance, print_position, + sizeof (lc_auto_position_t), + ls->auto_state->siman_params); + } + + /* Set thread topology and tiling specification */ + * topology = ls->auto_state->position.topology; + for (int d=0; d<3; ++d) { + tiling[d] = ls->auto_state->position.tiling[d]; + } + + } /* if not the first call */ +} + + + +void +lc_auto_finish (lc_statset_t * restrict const ls, + lc_stattime_t const * restrict const lt) +{ + ls->auto_state->time = + lt->time_calc_sum / + (lt->time_count * ls->npoints[0] * ls->npoints[1] * ls->npoints[2]); +} diff --git a/Carpet/LoopControl/src/lc_auto.h b/Carpet/LoopControl/src/lc_auto.h new file mode 100644 index 000000000..a0c7b1798 --- /dev/null +++ b/Carpet/LoopControl/src/lc_auto.h @@ -0,0 +1,37 @@ +#ifndef LC_AUTO_H +#define LC_AUTO_H + +#include + +#include "lc_siman.h" +#include "loopcontrol.h" + + + +typedef struct lc_auto_position_t { + int topology; + int tiling[3]; +} lc_auto_position_t; + +/* no typedef here; forward declcared in loopcontrol.h */ +struct lc_auto_state_t { + lc_siman_params_t siman_params; + gsl_rng * rng; + lc_siman_state_t * siman_state; + + lc_auto_position_t position; + double time; +}; + + + +void +lc_auto_init (lc_statset_t * restrict const ls, + int * restrict const topology, + int tiling[3]); + +void +lc_auto_finish (lc_statset_t * restrict const ls, + lc_stattime_t const * restrict const lt); + +#endif /* #ifndef LC_AUTO_H */ diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c new file mode 100644 index 000000000..a6fa22bc7 --- /dev/null +++ b/Carpet/LoopControl/src/lc_siman.c @@ -0,0 +1,191 @@ +/* Simulated annealing */ + +/* Adapted from GSL, the GNU Scientific Library, version 1.9 */ + +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "lc_siman.h" + +static inline +double safe_exp (double x) /* avoid underflow errors for large uphill steps */ +{ + return (x < GSL_LOG_DBL_MIN) ? 0.0 : exp(x); +} + +/* this structure contains internal state information for + lc_siman_solve */ + +typedef enum { state_initial, state_first, state_looping } lc_state_t; + +/* no typedef here; forward declcared in lc_siman.h */ +struct lc_siman_state_t { + lc_state_t state; + const gsl_rng *r; + lc_siman_step_t take_step; + lc_siman_metric_t distance; + lc_siman_print_t print_position; + size_t element_size; + lc_siman_params_t params; + void *x, *new_x, *best_x; + double E, new_E, best_E; + int i, done; + double T; + int n_evals, n_iter, n_accepts, n_rejects, n_eless; +}; + +/* implementation of a basic simulated annealing algorithm */ + +lc_siman_state_t * +lc_siman_solve (lc_siman_state_t *restrict state, + const gsl_rng *restrict r, + void *restrict x0_p, double E0, + lc_siman_step_t take_step, + lc_siman_metric_t distance, + lc_siman_print_t print_position, + size_t element_size, + lc_siman_params_t params) +{ + if (!state) { + state = malloc(sizeof *state); + state->state = state_initial; + } + switch (state->state) { + case state_initial: goto label_initial; + case state_first : goto label_first; + case state_looping: goto label_looping; + } + abort(); + + label_initial:; + state->n_evals = 1; + state->n_iter = 0; + + state->state = state_first; + return state; + + label_first:; + state->E = E0; + + state->x = malloc (element_size); + memcpy (state->x, x0_p, element_size); + state->new_x = malloc (element_size); + state->best_x = malloc (element_size); + memcpy (state->best_x, x0_p, element_size); + + state->best_E = state->E; + + state->T = params.t_initial; + state->done = 0; + + if (print_position) { + printf ("#-iter #-evals temperature position energy\n"); + } + + /* while (!done) */ + begin_while_notdone:; + if (state->done) goto end_while_notdone; + + state->n_accepts = 0; + state->n_rejects = 0; + state->n_eless = 0; + + /* for (i = 0; i < params.iters_fixed_T; ++i) */ + state->i = 0; + begin_for_i:; + if (state->i >= params.iters_fixed_T) goto end_for_i; + + memcpy (state->new_x, state->x, element_size); + + take_step (r, state->new_x, params.step_size); + memcpy (x0_p, state->new_x, element_size); + + state->state = state_looping; + return state; + + label_looping:; + state->new_E = E0; + + if (state->new_E <= state->best_E) { + memcpy (state->best_x, state->new_x, element_size); + state->best_E = state->new_E; + } + + ++state->n_evals; /* keep track of evaluations */ + /* now take the crucial step: see if the new point is accepted or + not, as determined by the boltzman probability */ + if (state->new_E < state->E) { + /* yay! take a step */ + memcpy (state->x, state->new_x, element_size); + state->E = state->new_E; + ++state->n_eless; + } else if (gsl_rng_uniform(r) < + safe_exp (-(state->new_E - state->E)/(params.k * state->T))) + { + /* yay! take a step */ + memcpy(state->x, state->new_x, element_size); + state->E = state->new_E; + ++state->n_accepts; + } else { + ++state->n_rejects; + } + + ++state->i; + goto begin_for_i; + end_for_i:; + + if (print_position) { + /* see if we need to print stuff as we go */ + /* printf("%5d %12g %5d %3d %3d %3d", n_iter, T, n_evals, */ + /* 100*n_eless/n_steps, 100*n_accepts/n_steps, */ + /* 100*n_rejects/n_steps); */ + printf ("%5d %7d %12g", state->n_iter, state->n_evals, state->T); + print_position (state->x); + printf (" %12g\n", state->E); + } + + /* apply the cooling schedule to the temperature */ + /* FIXME: I should also introduce a cooling schedule for the iters */ + state->T /= params.mu_t; + ++state->n_iter; + if (state->T < params.t_min) { + state->done = 1; + } + + goto begin_while_notdone; + end_while_notdone:; + + /* at the end, copy the result onto the initial point, so we pass it + back to the caller */ + memcpy (x0_p, state->best_x, element_size); + + free (state->x); + free (state->new_x); + free (state->best_x); + + free (state); + return NULL; +} diff --git a/Carpet/LoopControl/src/lc_siman.h b/Carpet/LoopControl/src/lc_siman.h new file mode 100644 index 000000000..c848ba378 --- /dev/null +++ b/Carpet/LoopControl/src/lc_siman.h @@ -0,0 +1,59 @@ +/* Simulated annealing */ + +/* Adapted from GSL, the GNU Scientific Library, version 1.9 */ + +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef SIMAN_H +#define SIMAN_H + +#include +#include + +/* types for the function pointers passed to lc_siman_solve */ + +typedef void (*lc_siman_step_t) (const gsl_rng *r, void *xp, double step_size); +typedef double (*lc_siman_metric_t) (void *xp, void *yp); +typedef void (*lc_siman_print_t) (void *xp); + +/* this structure contains all the information needed to structure the + search, beyond the energy function, the step function and the + initial guess. */ + +typedef struct { + int iters_fixed_T; /* how many iterations at each temperature? */ + double step_size; /* max step size in the random walk */ + /* the following parameters are for the Boltzmann distribution */ + double k, t_initial, mu_t, t_min; +} lc_siman_params_t; + +/* prototype for the workhorse function */ + +typedef struct lc_siman_state_t lc_siman_state_t; + +lc_siman_state_t * +lc_siman_solve (lc_siman_state_t *restrict state, + const gsl_rng *restrict r, + void *restrict x0_p, double E, + lc_siman_step_t take_step, + lc_siman_metric_t distance, + lc_siman_print_t print_position, + size_t element_size, + lc_siman_params_t params); + +#endif /* #ifndef SIMAN_H */ diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c index dd7d3f57f..32661064b 100644 --- a/Carpet/LoopControl/src/loopcontrol.c +++ b/Carpet/LoopControl/src/loopcontrol.c @@ -15,6 +15,7 @@ #include #include "loopcontrol.h" +#include "lc_auto.h" @@ -49,14 +50,21 @@ omp_get_wtime (void) /* Linked list of all loop statistics structures */ -struct lc_statmap_t * lc_statmap_list = NULL; +lc_statmap_t * lc_statmap_list = NULL; /* Find all possible thread topologies */ +/* This finds all possible thread topologies which can be expressed as + NIxNJxNK. More complex topologies, e.g. based on a recursive + subdiviston, are not considered (and cannot be expressed in the + data structures used in LoopControl). I think more complex + topologies are not necessary, since the number of treads is usually + quite small and contains many small factors in its prime + decomposition. */ static void -find_thread_topologies (struct lc_topology_t * restrict const topologies, +find_thread_topologies (lc_topology_t * restrict const topologies, const int maxntopologies, int * restrict const ntopologies, int const nthreads) @@ -83,17 +91,27 @@ find_thread_topologies (struct lc_topology_t * restrict const topologies, /* Find "good" tiling specifications */ +/* This calculates a subset of all possible thread specifications. + One aim is to reduce the search space by disregarding some + specifications. The other aim is to distribute the specifications + "equally", so that one does not have to spend much effort + investigating tilign specifications with very similar properties. + For example, if there are 200 grid points, then half of the + possible tiling specifications consists of splitting the domain + into two subdomains with [100+N, 100-N] points. This is avoided by + covering all possible tiling specifications in exponentially + growing step sizes. */ static int tiling_compare (const void * const a, const void * const b) { - struct lc_tiling_t const * const aa = a; - struct lc_tiling_t const * const bb = b; + lc_tiling_t const * const aa = a; + lc_tiling_t const * const bb = b; return aa->npoints - bb->npoints; } static void -find_tiling_specifications (struct lc_tiling_t * restrict const tilings, +find_tiling_specifications (lc_tiling_t * restrict const tilings, const int maxntilings, int * restrict const ntilings, int const npoints) @@ -142,8 +160,8 @@ find_tiling_specifications (struct lc_tiling_t * restrict const tilings, /* Initialise control parameter set statistics */ static void -lc_stattime_init (struct lc_stattime_t * restrict const lt, - struct lc_statset_t * restrict const ls, +lc_stattime_init (lc_stattime_t * restrict const lt, + lc_statset_t * restrict const ls, int const topology, int const tiling[3]) { @@ -239,14 +257,14 @@ lc_stattime_init (struct lc_stattime_t * restrict const lt, } static -struct lc_stattime_t * -lc_stattime_find (struct lc_statset_t * restrict const ls, +lc_stattime_t * +lc_stattime_find (lc_statset_t * restrict const ls, int const topology, int const tiling[3]) { assert (ls); - struct lc_stattime_t * lt; + lc_stattime_t * lt; for (lt = ls->stattime_list; lt; lt = lt->next) { if (lt->topology == topology && @@ -271,8 +289,8 @@ lc_stattime_find (struct lc_statset_t * restrict const ls, /* Initialise user parameter set statistics */ static void -lc_statset_init (struct lc_statset_t * restrict const ls, - struct lc_statmap_t * restrict const lm, +lc_statset_init (lc_statset_t * restrict const ls, + lc_statmap_t * restrict const lm, int const num_threads, int const npoints[3]) { @@ -344,6 +362,11 @@ lc_statset_init (struct lc_statset_t * restrict const ls, + /* Simulated annealing state */ + ls->auto_state = NULL; + + + /* Initialise list */ ls->stattime_list = NULL; @@ -355,14 +378,14 @@ lc_statset_init (struct lc_statset_t * restrict const ls, } static -struct lc_statset_t * -lc_statset_find (struct lc_statmap_t * restrict const lm, +lc_statset_t * +lc_statset_find (lc_statmap_t * restrict const lm, int const num_threads, int const npoints[3]) { assert (lm); - struct lc_statset_t * ls; + lc_statset_t * ls; for (ls = lm->statset_list; ls; ls = ls->next) { if (ls->num_threads == num_threads && @@ -386,7 +409,7 @@ lc_statset_find (struct lc_statmap_t * restrict const lm, /* Initialise loop statistics */ void -lc_statmap_init (struct lc_statmap_t * restrict const lm, +lc_statmap_init (lc_statmap_t * restrict const lm, char const * restrict const name) { /* Check arguments */ @@ -406,8 +429,8 @@ lc_statmap_init (struct lc_statmap_t * restrict const lm, void -lc_control_init (struct lc_control_t * restrict const lc, - struct lc_statmap_t * restrict const lm, +lc_control_init (lc_control_t * restrict const lc, + lc_statmap_t * restrict const lm, int const imin, int const jmin, int const kmin, int const imax, int const jmax, int const kmax, int const ilsh, int const jlsh, int const klsh) @@ -438,7 +461,7 @@ lc_control_init (struct lc_control_t * restrict const lc, - struct lc_statset_t * restrict ls; + lc_statset_t * restrict ls; _Pragma ("omp single copyprivate (ls)") { /* Get number of threads */ int const num_threads = omp_get_num_threads(); @@ -454,7 +477,7 @@ lc_control_init (struct lc_control_t * restrict const lc, - struct lc_stattime_t * restrict lt; + lc_stattime_t * restrict lt; _Pragma ("omp single copyprivate (lt)") { /* Select topology */ @@ -518,6 +541,11 @@ lc_control_init (struct lc_control_t * restrict const lc, tiling[2] = ls->ntilings[2] - 1; /* as many points as possible */ } + /* Use simulated annealing to find the best loop configuration */ + if (use_simulated_annealing) { + lc_auto_init (ls, & topology, tiling); + } + /* Find or create database entry */ lt = lc_stattime_find (ls, topology, tiling); @@ -618,7 +646,7 @@ lc_control_init (struct lc_control_t * restrict const lc, void -lc_control_finish (struct lc_control_t * restrict const lc) +lc_control_finish (lc_control_t * restrict const lc) { /* Timer */ double const time_calc_end = omp_get_wtime(); @@ -632,9 +660,9 @@ lc_control_finish (struct lc_control_t * restrict const lc) double const time_calc_sum = time_calc_end - time_calc_begin; double const time_calc_sum2 = pow (time_calc_sum, 2); - + /* Update statistics */ - struct lc_stattime_t * restrict const lt = lc->stattime; + lc_stattime_t * restrict const lt = lc->stattime; _Pragma ("omp critical") { lt->time_count += 1.0; @@ -644,6 +672,18 @@ lc_control_finish (struct lc_control_t * restrict const lc) lt->time_calc_sum += time_calc_sum; lt->time_calc_sum2 += time_calc_sum2; } + + { + DECLARE_CCTK_PARAMETERS; + if (use_simulated_annealing) { + _Pragma ("omp single") { + lc_statset_t * restrict const ls = lc->statset; + lc_auto_finish (ls, lt); + } + } + } + + _Pragma ("omp barrier"); } @@ -673,12 +713,12 @@ lc_printstats (CCTK_ARGUMENTS) if (! verbose) return; int nmaps = 0; - for (struct lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) { + for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) { printf ("statmap #%d \"%s\":\n", nmaps, lm->name); int nsets = 0; - for (struct lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) { + for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) { printf (" statset #%d nthreads=%d npoints=[%d,%d,%d]\n", nsets, ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]); @@ -687,7 +727,7 @@ lc_printstats (CCTK_ARGUMENTS) double min_calc = DBL_MAX; int imin_calc = -1; int ntimes = 0; - for (struct lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { + for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { printf (" stattime #%d topology=%d [%d,%d,%d] tiling=[%d,%d,%d]\n", ntimes, lt->topology, lt->inthreads, lt->jnthreads, lt->knthreads, @@ -714,7 +754,7 @@ lc_printstats (CCTK_ARGUMENTS) CCTK_FCALL void -CCTK_FNAME (lc_statmap_init) (struct lc_statmap_t * restrict const lm, +CCTK_FNAME (lc_statmap_init) (lc_statmap_t * restrict const lm, ONE_FORTSTRING_ARG) { ONE_FORTSTRING_CREATE (name); @@ -724,8 +764,8 @@ CCTK_FNAME (lc_statmap_init) (struct lc_statmap_t * restrict const lm, CCTK_FCALL void -CCTK_FNAME (lc_control_init) (struct lc_control_t * restrict const lc, - struct lc_statmap_t * restrict const lm, +CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc, + lc_statmap_t * restrict const lm, int const * restrict const imin, int const * restrict const jmin, int const * restrict const kmin, @@ -744,7 +784,7 @@ CCTK_FNAME (lc_control_init) (struct lc_control_t * restrict const lc, CCTK_FCALL void -CCTK_FNAME (lc_control_finish) (struct lc_control_t * restrict const lc) +CCTK_FNAME (lc_control_finish) (lc_control_t * restrict const lc) { lc_control_finish (lc); } diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 440e5ab26..43ac5784b 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -35,20 +35,25 @@ /* A topology */ -struct lc_topology_t { +typedef struct lc_topology_t { int ni, nj, nk; -}; +} lc_topology_t; /* A tiling specification */ -struct lc_tiling_t { +typedef struct lc_tiling_t { int npoints; -}; +} lc_tiling_t; + + + +/* For simulated annealing */ +typedef struct lc_auto_state_t lc_auto_state_t; /* Statistics for one control parameter set (thread topology and tiling specification) of one user parameter set of one loop */ -struct lc_stattime_t { +typedef struct lc_stattime_t { struct lc_stattime_t * next; /* Keys */ @@ -64,11 +69,11 @@ struct lc_stattime_t { double time_count; /* number of calls and threads */ double time_setup_sum, time_setup_sum2; /* time spent setting up loops */ double time_calc_sum, time_calc_sum2; /* time spent iterating */ -}; +} lc_stattime_t; /* Statistics for one user parameter set (number of threads and number of iterations) of one loop */ -struct lc_statset_t { +typedef struct lc_statset_t { struct lc_statset_t * next; /* Keys */ @@ -79,35 +84,38 @@ struct lc_statset_t { /* Data */ /* Thread topologies */ - struct lc_topology_t * restrict topologies; + lc_topology_t * restrict topologies; int ntopologies; /* Tiling specifications */ - struct lc_tiling_t * restrict tilings[3]; + lc_tiling_t * restrict tilings[3]; int ntilings[3]; - struct lc_stattime_t * stattime_list; -}; + /* Simulated annealing state */ + lc_auto_state_t * auto_state; + + lc_stattime_t * stattime_list; +} lc_statset_t; /* Statistics for one loop (one source code location) */ -struct lc_statmap_t { +typedef struct lc_statmap_t { struct lc_statmap_t * next; /* for linked list */ /* Name */ char const * restrict name; - struct lc_statset_t * statset_list; -}; + lc_statset_t * statset_list; +} lc_statmap_t; /* Linked list of all loop statistics structures */ -extern struct lc_statmap_t * lc_statmap_list; +extern lc_statmap_t * lc_statmap_list; -struct lc_control_t { - struct lc_statmap_t * restrict statmap; - struct lc_statset_t * restrict statset; - struct lc_stattime_t * restrict stattime; +typedef struct lc_control_t { + lc_statmap_t * restrict statmap; + lc_statset_t * restrict statset; + lc_stattime_t * restrict stattime; /* Copy of arguments (useful for debugging) */ int imin, jmin, kmin; @@ -130,7 +138,7 @@ struct lc_control_t { /* Timing statistics */ double time_setup_begin, time_calc_begin; -}; +} lc_control_t; @@ -151,24 +159,24 @@ lc_max (int const i, int const j) void -lc_statmap_init (struct lc_statmap_t * restrict ls, +lc_statmap_init (lc_statmap_t * restrict ls, char const * restrict name); void -lc_control_init (struct lc_control_t * restrict lc, - struct lc_statmap_t * restrict lm, +lc_control_init (lc_control_t * restrict lc, + lc_statmap_t * restrict lm, int imin, int jmin, int kmin, int imax, int jmax, int kmax, int ilsh, int jlsh, int klsh); void -lc_control_finish (struct lc_control_t * restrict lc); +lc_control_finish (lc_control_t * restrict lc); #define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \ do { \ - static struct lc_statmap_t lc_lm; \ + static lc_statmap_t lc_lm; \ static int lc_initialised = 0; \ if (! lc_initialised) { \ _Pragma ("omp single") { \ @@ -179,7 +187,7 @@ lc_control_finish (struct lc_control_t * restrict lc); lc_initialised = 1; \ } \ } \ - struct lc_control_t lc_lc; \ + lc_control_t lc_lc; \ lc_control_init (& lc_lc, & lc_lm, \ imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh); \ \ @@ -224,55 +232,7 @@ lc_printstats (CCTK_ARGUMENTS); #ifdef FCODE - -#define LC_DECLARE3(name, i,j,k) &&\ -type (lc_statmap_t), save :: name/**/_lm &&\ -logical, save :: name/**/_initialised = .false. &&\ -type (lc_control_t) :: name/**/_lc &&\ -integer :: name/**/_ii, name/**/_jj, name/**/_kk &&\ -integer :: name/**/_imax, name/**/_jmax, name/**/_kmax &&\ -integer :: i, j, k - -#define LC_PRIVATE3(name) \ -name/**/_lc, \ -name/**/_imax, name/**/_jmax, name/**/_kmax - -#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\ -if (.not. name/**/_initialised) then &&\ -!$omp single &&\ - call lc_statmap_init (name/**/_lm, "name") &&\ -!$omp end single &&\ -!$omp single &&\ - /* Set this flag only after initialising */ &&\ - name/**/_initialised = .true. &&\ -!$omp end single &&\ -end if &&\ -call lc_control_init (name/**/_lc, name/**/_lm, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\ - &&\ -/* Coarse loop */ &&\ -do name/**/_kk = name/**/_lc%kkmin + 1, name/**/_lc%kkmax, name/**/_lc%kkstep &&\ - name/**/_kmax = min (name/**/_kk - 1 + name/**/_lc%kkstep, name/**/_lc%kkmax) &&\ - do name/**/_jj = name/**/_lc%jjmin + 1, name/**/_lc%jjmax, name/**/_lc%jjstep &&\ - name/**/_jmax = min (name/**/_jj - 1 + name/**/_lc%jjstep, name/**/_lc%jjmax) &&\ - do name/**/_ii = name/**/_lc%iimin + 1, name/**/_lc%iimax, name/**/_lc%iistep &&\ - name/**/_imax = min (name/**/_ii - 1 + name/**/_lc%iistep, name/**/_lc%iimax) &&\ - &&\ - /* Fine loop */ &&\ - do k = name/**/_kk, name/**/_kmax &&\ - do j = name/**/_jj, name/**/_jmax &&\ - do i = name/**/_ii, name/**/_imax - -#define LC_ENDLOOP3(name) &&\ - end do &&\ - end do &&\ - end do &&\ - &&\ - end do &&\ - end do &&\ -end do &&\ - &&\ -call lc_control_finish (name/**/_lc) - +# include "loopcontrol_fortran.h" #endif #endif /* ifndef LC_LOOPCONTROL_H */ diff --git a/Carpet/LoopControl/src/loopcontrol_fortran.h b/Carpet/LoopControl/src/loopcontrol_fortran.h new file mode 100644 index 000000000..e80bf358f --- /dev/null +++ b/Carpet/LoopControl/src/loopcontrol_fortran.h @@ -0,0 +1,64 @@ +/* -*-f90-mode-*- */ + +#ifndef LOOPCONTROL_FORTRAN_H +#define LOOPCONTROL_FORTRAN_H + + + +#define LC_DECLARE3(name, i,j,k) &&\ +type (lc_statmap_t), save :: name/**/_lm &&\ +logical, save :: name/**/_initialised = .false. &&\ +type (lc_control_t) :: name/**/_lc &&\ +integer :: name/**/_ii, name/**/_jj, name/**/_kk &&\ +integer :: name/**/_imax, name/**/_jmax, name/**/_kmax &&\ +integer :: i, j, k + + + +#define LC_PRIVATE3(name) \ +name/**/_lc, \ +name/**/_imax, name/**/_jmax, name/**/_kmax + + + +#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\ +if (.not. name/**/_initialised) then &&\ +!$omp single &&\ + call lc_statmap_init (name/**/_lm, "name") &&\ +!$omp end single &&\ +!$omp single &&\ + /* Set this flag only after initialising */ &&\ + name/**/_initialised = .true. &&\ +!$omp end single &&\ +end if &&\ +call lc_control_init (name/**/_lc, name/**/_lm, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) &&\ + &&\ +/* Coarse loop */ &&\ +do name/**/_kk = name/**/_lc%kkmin + 1, name/**/_lc%kkmax, name/**/_lc%kkstep &&\ + name/**/_kmax = min (name/**/_kk - 1 + name/**/_lc%kkstep, name/**/_lc%kkmax) &&\ + do name/**/_jj = name/**/_lc%jjmin + 1, name/**/_lc%jjmax, name/**/_lc%jjstep &&\ + name/**/_jmax = min (name/**/_jj - 1 + name/**/_lc%jjstep, name/**/_lc%jjmax) &&\ + do name/**/_ii = name/**/_lc%iimin + 1, name/**/_lc%iimax, name/**/_lc%iistep &&\ + name/**/_imax = min (name/**/_ii - 1 + name/**/_lc%iistep, name/**/_lc%iimax) &&\ + &&\ + /* Fine loop */ &&\ + do k = name/**/_kk, name/**/_kmax &&\ + do j = name/**/_jj, name/**/_jmax &&\ + do i = name/**/_ii, name/**/_imax + + + +#define LC_ENDLOOP3(name) &&\ + end do &&\ + end do &&\ + end do &&\ + &&\ + end do &&\ + end do &&\ +end do &&\ + &&\ +call lc_control_finish (name/**/_lc) + + + +#endif /* #ifndef LOOPCONTROL_FORTRAN_H */ diff --git a/Carpet/LoopControl/src/make.code.defn b/Carpet/LoopControl/src/make.code.defn index c37055419..808a7baae 100644 --- a/Carpet/LoopControl/src/make.code.defn +++ b/Carpet/LoopControl/src/make.code.defn @@ -1,7 +1,7 @@ # Main make.code.defn file for thorn LoopControl # Source files in this directory -SRCS = loopcontrol.c loopcontrol.F90 loopcontrol_types.F90 +SRCS = lc_auto.c lc_siman.c loopcontrol.c loopcontrol.F90 loopcontrol_types.F90 # Subdirectories containing source files SUBDIRS = -- cgit v1.2.3