diff options
Diffstat (limited to 'Carpet/LoopControl/src/loopcontrol.h')
-rw-r--r-- | Carpet/LoopControl/src/loopcontrol.h | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index 0f3b6d80d..e23806fcb 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -242,6 +242,7 @@ typedef struct lc_control_t { int imin, jmin, kmin; int imax, jmax, kmax; int ilsh, jlsh, klsh; + int di; /* Control settings for thread parallelism (useful for debugging) */ int iiimin, jjjmin, kkkmin; @@ -303,7 +304,8 @@ lc_control_init (lc_control_t * restrict lc, lc_statmap_t * restrict lm, int imin, int jmin, int kmin, int imax, int jmax, int kmax, - int ilsh, int jlsh, int klsh); + int ilsh, int jlsh, int klsh, + int di); void lc_control_finish (lc_control_t * restrict lc); @@ -311,15 +313,24 @@ lc_control_finish (lc_control_t * restrict lc); #define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \ + LC_LOOP3VEC(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh, 1) +#define LC_ENDLOOP3(name) \ + LC_ENDLOOP3VEC(name) + +#define LC_LOOP3VEC(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, ilsh_,jlsh_,klsh_, di_) \ do { \ static int lc_initialised = 0; \ static lc_statmap_t lc_lm; \ if (! lc_initialised) { \ lc_statmap_init (& lc_initialised, & lc_lm, #name); \ } \ + int const lc_di = (di_); \ lc_control_t lc_lc; \ lc_control_init (& lc_lc, & lc_lm, \ - imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh); \ + (imin_), (jmin_), (kmin_), \ + (imax_), (jmax_), (kmax_), \ + (ilsh_), (jlsh_), (klsh_), \ + lc_di); \ \ /* Coarse loop */ \ for (int lc_kk = lc_lc.kkmin; \ @@ -327,33 +338,33 @@ lc_control_finish (lc_control_t * restrict lc); lc_kk += lc_lc.kkstep) \ { \ int const lc_kmin = lc_kk + lc_lc.kkkkmin; \ - int const lc_kmax = \ - lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax); \ + int const lc_kmax = lc_min (lc_kk + lc_lc.kkkkmax, lc_lc.kkmax); \ \ for (int lc_jj = lc_lc.jjmin; \ lc_jj < lc_lc.jjmax; \ lc_jj += lc_lc.jjstep) \ { \ int const lc_jmin = lc_jj + lc_lc.jjjjmin; \ - int const lc_jmax = \ - lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax); \ + int const lc_jmax = lc_min (lc_jj + lc_lc.jjjjmax, lc_lc.jjmax); \ \ for (int lc_ii = lc_lc.iimin; \ lc_ii < lc_lc.iimax; \ lc_ii += lc_lc.iistep) \ { \ int const lc_imin = lc_ii + lc_lc.iiiimin; \ - int const lc_imax = \ - lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax); \ + int const lc_imax = lc_min (lc_ii + lc_lc.iiiimax, lc_lc.iimax); \ \ /* Fine loop */ \ for (int k = lc_kmin; k < lc_kmax; ++k) { \ for (int j = lc_jmin; j < lc_jmax; ++j) { \ LC_PRELOOP_STATEMENTS \ { \ - for (int i = lc_imin; i < lc_imax; ++i) { + int const lc_ipos = \ + lc_imin + lc_lc.ilsh * (j + lc_lc.jlsh * k); \ + int const lc_ioffset = (lc_ipos & - lc_di) - lc_ipos; \ + for (int i = lc_imin + lc_ioffset; i < lc_imax; i += lc_di) { -#define LC_ENDLOOP3(name) \ +#define LC_ENDLOOP3VEC(name) \ } \ } \ LC_POSTLOOP_STATEMENTS \ |