diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2010-08-25 00:08:50 -0400 |
---|---|---|
committer | Barry Wardell <barry.wardell@gmail.com> | 2011-12-14 18:21:16 +0000 |
commit | 17ff1a4043632ef415b0f908f1dde0f936c99177 (patch) | |
tree | 4747a5e875229bfab0c93da0d07b820d35ddb931 /Carpet/LoopControl | |
parent | e0717dd1f8fc393f79c51d7910981ac962d5964e (diff) |
LoopControl: Add loop macros for vectorised code
Diffstat (limited to 'Carpet/LoopControl')
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.c | 9 | ||||
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.h | 31 |
2 files changed, 28 insertions, 12 deletions
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c index 15f0b3235..875c6fc60 100644 --- a/Carpet/LoopControl/src/loopcontrol.c +++ b/Carpet/LoopControl/src/loopcontrol.c @@ -632,7 +632,8 @@ lc_control_init (lc_control_t * restrict const lc, lc_statmap_t * restrict const lm, int const imin, int const jmin, int const kmin, int const imax, int const jmax, int const kmax, - int const ilsh, int const jlsh, int const klsh) + int const ilsh, int const jlsh, int const klsh, + int const di) { DECLARE_CCTK_PARAMETERS; @@ -646,6 +647,7 @@ lc_control_init (lc_control_t * restrict const lc, assert (imin >= 0 && imax <= ilsh && ilsh >= 0); assert (jmin >= 0 && jmax <= jlsh && jlsh >= 0); assert (kmin >= 0 && kmax <= klsh && klsh >= 0); + assert (di > 0); /* Copy arguments */ lc->imin = imin; @@ -657,6 +659,7 @@ lc_control_init (lc_control_t * restrict const lc, lc->ilsh = ilsh; lc->jlsh = jlsh; lc->klsh = klsh; + lc->di = di; /* vector size */ @@ -665,6 +668,7 @@ lc_control_init (lc_control_t * restrict const lc, #pragma omp single copyprivate (ls) { /* Calculate number of points */ + /* TODO: Take vector size into account */ int npoints[3]; npoints[0] = lc_max (imax - imin, 0); npoints[1] = lc_max (jmax - jmin, 0); @@ -1107,7 +1111,8 @@ CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc, lc_control_init (lc, lm, * imin - 1, * jmin - 1, * kmin - 1, * imax, * jmax, * kmax, - * ilsh, * jlsh, * klsh); + * ilsh, * jlsh, * klsh, + 1); } CCTK_FCALL diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 0f3b6d80d..e23806fcb 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -242,6 +242,7 @@ typedef struct lc_control_t { int imin, jmin, kmin; int imax, jmax, kmax; int ilsh, jlsh, klsh; + int di; /* Control settings for thread parallelism (useful for debugging) */ int iiimin, jjjmin, kkkmin; @@ -303,7 +304,8 @@ lc_control_init (lc_control_t * restrict lc, lc_statmap_t * restrict lm, int imin, int jmin, int kmin, int imax, int jmax, int kmax, - int ilsh, int jlsh, int klsh); + int ilsh, int jlsh, int klsh, + int di); void lc_control_finish (lc_control_t * restrict lc); @@ -311,15 +313,24 @@ lc_control_finish (lc_control_t * restrict lc); #define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \ + LC_LOOP3VEC(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh, 1) +#define LC_ENDLOOP3(name) \ + LC_ENDLOOP3VEC(name) + +#define LC_LOOP3VEC(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, ilsh_,jlsh_,klsh_, di_) \ do { \ static int lc_initialised = 0; \ static lc_statmap_t lc_lm; \ if (! lc_initialised) { \ lc_statmap_init (& lc_initialised, & lc_lm, #name); \ } \ + int const lc_di = (di_); \ lc_control_t lc_lc; \ lc_control_init (& lc_lc, & lc_lm, \ - imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh); \ + (imin_), (jmin_), (kmin_), \ + (imax_), (jmax_), (kmax_), \ + (ilsh_), (jlsh_), (klsh_), \ + lc_di); \ \ /* Coarse loop */ \ for (int lc_kk = lc_lc.kkmin; \ @@ -327,33 +338,33 @@ lc_control_finish (lc_control_t * restrict lc); lc_kk += lc_lc.kkstep) \ { \ int const lc_kmin = lc_kk + lc_lc.kkkkmin; \ - int const lc_kmax = \ - lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax); \ + int const lc_kmax = lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax); \ \ for (int lc_jj = lc_lc.jjmin; \ lc_jj < lc_lc.jjmax; \ lc_jj += lc_lc.jjstep) \ { \ int const lc_jmin = lc_jj + lc_lc.jjjjmin; \ - int const lc_jmax = \ - lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax); \ + int const lc_jmax = lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax); \ \ for (int lc_ii = lc_lc.iimin; \ lc_ii < lc_lc.iimax; \ lc_ii += lc_lc.iistep) \ { \ int const lc_imin = lc_ii + lc_lc.iiiimin; \ - int const lc_imax = \ - lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax); \ + int const lc_imax = lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax); \ \ /* Fine loop */ \ for (int k = lc_kmin; k < lc_kmax; ++k) { \ for (int j = lc_jmin; j < lc_jmax; ++j) { \ LC_PRELOOP_STATEMENTS \ { \ - for (int i = lc_imin; i < lc_imax; ++i) { + int const lc_ipos = \ + lc_imin + lc_lc.ilsh * (j + lc_lc.jlsh * k); \ + int const lc_ioffset = (lc_ipos & - lc_di) - lc_ipos; \ + for (int i = lc_imin + lc_ioffset; i < lc_imax; i += lc_di) { -#define LC_ENDLOOP3(name) \ +#define LC_ENDLOOP3VEC(name) \ } \ } \ LC_POSTLOOP_STATEMENTS \ |