diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2008-08-28 11:55:27 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@cct.lsu.edu> | 2008-08-29 15:12:04 -0500 |
commit | 09affe060cdf3a5b33f16ed9a95cf55ae42218da (patch) | |
tree | 830a93c23fd46169fc4298072649a1761123c25d /Carpet/LoopControl/src | |
parent | a586f0874c1b1ca03f87c69b76169962ce5fe669 (diff) |
LoopControl: Allow preloop and postloop statements
Allow user-defined statements surrounding the innermost loop, e.g. for
vectorisation.
Diffstat (limited to 'Carpet/LoopControl/src')
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.c | 11 | ||||
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.h | 35 |
2 files changed, 39 insertions, 7 deletions
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c index cef4fbba7..dd726695a 100644 --- a/Carpet/LoopControl/src/loopcontrol.c +++ b/Carpet/LoopControl/src/loopcontrol.c @@ -749,6 +749,12 @@ lc_control_init (lc_control_t * restrict const lc, lc->jjjstep = (lc->jjjmax - lc->jjjmin + lt->jnthreads-1) / lt->jnthreads; lc->kkkstep = (lc->kkkmax - lc->kkkmin + lt->knthreads-1) / lt->knthreads; +#if 0 + /* Correct threading for vectorisation (cache line size) */ + lc->iiistep = + (lc->iiistep + LC_VECTORSIZE - 1) / LC_VECTORSIZE * LC_VECTORSIZE; +#endif + /* Find location of current thread */ lc->thread_num = omp_get_thread_num(); int c = lc->thread_num; @@ -775,6 +781,11 @@ lc_control_init (lc_control_t * restrict const lc, lc->jjstep = lt->jnpoints; lc->kkstep = lt->knpoints; +#if 0 + /* Correct tiling for vectorisation (cache line size) */ + lc->iistep = (lc->iistep + LC_VECTORSIZE - 1) / LC_VECTORSIZE * LC_VECTORSIZE; +#endif + /****************************************************************************/ diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 3399abb14..5c71f0fee 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -46,6 +46,14 @@ extern "C" { +#if 0 +/* Vector size */ +#define LC_VECTORSIZE 2 /* Correct for double precision on + Intel */ +#endif + + + /* A topology */ typedef struct lc_topology_t { int nthreads[3]; @@ -306,16 +314,29 @@ lc_control_finish (lc_control_t * restrict lc); int const lc_imax = lc_min (lc_ii + lc_lc.iistep, lc_lc.iimax); \ \ /* Fine loop */ \ - for (int k = lc_kk; k < lc_kmax; ++k) \ - for (int j = lc_jj; j < lc_jmax; ++j) \ - for (int i = lc_ii; i < lc_imax; ++i) + for (int k = lc_kk; k < lc_kmax; ++k) { \ + for (int j = lc_jj; j < lc_jmax; ++j) { \ + int const lc_imin = lc_ii; \ + LC_PRELOOP_STATEMENTS \ + { \ + for (int i = lc_imin; i < lc_imax; ++i) { #define LC_ENDLOOP3(name) \ } \ - } \ - } \ - lc_control_finish (& lc_lc); \ - } while (0) + } \ + LC_POSTLOOP_STATEMENTS \ + } \ + } \ + } \ + } \ + } \ + lc_control_finish (& lc_lc); \ + } while (0) + +/* Pre- and post loop statements are inserted around the innermost + loop, which is executed serially. By default these are empty. */ +#define LC_PRELOOP_STATEMENTS +#define LC_POSTLOOP_STATEMENTS |