From 09affe060cdf3a5b33f16ed9a95cf55ae42218da Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 28 Aug 2008 11:55:27 -0500 Subject: LoopControl: Allow preloop and postloop statements Allow user-defined statements surrounding the innermost loop, e.g. for vectorisation. --- Carpet/LoopControl/src/loopcontrol.c | 11 +++++++++++ Carpet/LoopControl/src/loopcontrol.h | 35 ++++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'Carpet/LoopControl/src') diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c index cef4fbba7..dd726695a 100644 --- a/Carpet/LoopControl/src/loopcontrol.c +++ b/Carpet/LoopControl/src/loopcontrol.c @@ -749,6 +749,12 @@ lc_control_init (lc_control_t * restrict const lc, lc->jjjstep = (lc->jjjmax - lc->jjjmin + lt->jnthreads-1) / lt->jnthreads; lc->kkkstep = (lc->kkkmax - lc->kkkmin + lt->knthreads-1) / lt->knthreads; +#if 0 + /* Correct threading for vectorisation (cache line size) */ + lc->iiistep = + (lc->iiistep + LC_VECTORSIZE - 1) / LC_VECTORSIZE * LC_VECTORSIZE; +#endif + /* Find location of current thread */ lc->thread_num = omp_get_thread_num(); int c = lc->thread_num; @@ -775,6 +781,11 @@ lc_control_init (lc_control_t * restrict const lc, lc->jjstep = lt->jnpoints; lc->kkstep = lt->knpoints; +#if 0 + /* Correct tiling for vectorisation (cache line size) */ + lc->iistep = (lc->iistep + LC_VECTORSIZE - 1) / LC_VECTORSIZE * LC_VECTORSIZE; +#endif + /****************************************************************************/ diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 3399abb14..5c71f0fee 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -46,6 +46,14 @@ extern "C" { +#if 0 +/* Vector size */ +#define LC_VECTORSIZE 2 /* Correct for double precision on + Intel */ +#endif + + + /* A topology */ typedef struct lc_topology_t { int nthreads[3]; @@ -306,16 +314,29 @@ lc_control_finish (lc_control_t * restrict lc); int const lc_imax = lc_min (lc_ii + lc_lc.iistep, lc_lc.iimax); \ \ /* Fine loop */ \ - for (int k = lc_kk; k < lc_kmax; ++k) \ - for (int j = lc_jj; j < lc_jmax; ++j) \ - for (int i = lc_ii; i < lc_imax; ++i) + for (int k = lc_kk; k < lc_kmax; ++k) { \ + for (int j = lc_jj; j < lc_jmax; ++j) { \ + int const lc_imin = lc_ii; \ + LC_PRELOOP_STATEMENTS \ + { \ + for (int i = lc_imin; i < lc_imax; ++i) { #define LC_ENDLOOP3(name) \ } \ - } \ - } \ - lc_control_finish (& lc_lc); \ - } while (0) + } \ + LC_POSTLOOP_STATEMENTS \ + } \ + } \ + } \ + } \ + } \ + lc_control_finish (& lc_lc); \ + } while (0) + +/* Pre- and post loop statements are inserted around the innermost + loop, which is executed serially. By default these are empty. */ +#define LC_PRELOOP_STATEMENTS +#define LC_POSTLOOP_STATEMENTS -- cgit v1.2.3