aboutsummaryrefslogtreecommitdiff
path: root/Carpet/LoopControl/src/loopcontrol.h
blob: 3d5571810ffea6b4336d357f9965d63750f8688e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#ifndef LOOPCONTROL_H
#define LOOPCONTROL_H

/* This file uses the namespace LC_* for macros and lc_* for C
   identifiers. */

#define LC_DIM 3
  


#ifdef CCODE

#include <assert.h>
#include <stddef.h>
#include <stdlib.h>

#include <cctk.h>

/* #define lc_assert(x) ((void)0) */
#define lc_assert(x) assert(x)



#ifdef __cplusplus
extern "C" {
#endif
  
  
  
  static inline ptrdiff_t lc_min(const ptrdiff_t i, const ptrdiff_t j)
  {
    return i < j ? i : j;
  }
  
  static inline ptrdiff_t lc_max(const ptrdiff_t i, const ptrdiff_t j)
  {
    return i > j ? i : j;
  }
  
  
  
  struct lc_thread_info_t;
  struct lc_fine_thread_comm_t;
  
  struct lc_descr_t;
  
  typedef struct {
    ptrdiff_t v[LC_DIM];
  } lc_vec_t;
  
  typedef struct {
    int v[LC_DIM];
  } lc_ivec_t;
  
  typedef struct {
    /* Traverse pos from min (inclusive) to max (exclusive) with a
       stride of step. Equivalently, traverse idx from 0 (inclusive)
       to count (exclusive). */
    lc_vec_t min, max, step, pos;
    lc_ivec_t count, idx;
  } lc_space_t;
  
  typedef struct {
    /* memory layout */
    lc_vec_t ash;
    
    /* overall loop bounds */
    lc_space_t overall;
    
    /* coarse threads */
    lc_space_t coarse_thread;
    /* shared between all threads */
    struct lc_thread_info_t* coarse_thread_info_ptr;
    int coarse_thread_done;
    
    /* coarse loop; count, idx, pos are undefined */
    lc_space_t coarse_loop;
    
    /* fine loop; count, idx, pos are undefined */
    lc_space_t fine_loop;

    /* fine threads; min, max, pos are undefined */
    lc_space_t fine_thread;
    /* shared between fine threads */
    struct lc_fine_thread_comm_t* fine_thread_comm_ptr;
    
    /* selftest: shared between all threads */
    unsigned char* selftest_array;
  } lc_control_t;
  
  
  
  void lc_descr_init(struct lc_descr_t** descr,
                     const char* name, const char* file, int line);
  void lc_control_init(lc_control_t* restrict control,
                       struct lc_descr_t *descr,
                       ptrdiff_t imin, ptrdiff_t jmin, ptrdiff_t kmin,
                       ptrdiff_t imax, ptrdiff_t jmax, ptrdiff_t kmax,
                       ptrdiff_t iash, ptrdiff_t jash, ptrdiff_t kash,
                       ptrdiff_t istr);
  void lc_control_finish(lc_control_t* restrict control,
                         struct lc_descr_t* descr);
  
  void lc_thread_init(lc_control_t* restrict control);
  int lc_thread_done(const lc_control_t* restrict control);
  void lc_thread_step(lc_control_t* restrict control);
  
  void lc_selftest_set(const lc_control_t* restrict control,
                       ptrdiff_t imin, ptrdiff_t imax, ptrdiff_t istr,
                       ptrdiff_t i, ptrdiff_t j, ptrdiff_t k);
  
  
  
#define LC_COARSE_SETUP(D)                                              \
  lc_control.coarse_loop.min.v[D] = lc_control.coarse_thread.pos.v[D];  \
  lc_control.coarse_loop.max.v[D] =                                     \
    lc_min(lc_control.coarse_thread.max.v[D],                           \
           lc_control.coarse_loop.min.v[D] +                            \
           lc_control.coarse_thread.step.v[D]);                         \
  const ptrdiff_t lc_cmin##D = lc_control.coarse_loop.min.v[D];         \
  const ptrdiff_t lc_cmax##D = lc_control.coarse_loop.max.v[D];         \
  const ptrdiff_t lc_cstep##D = lc_control.coarse_loop.step.v[D];
#define LC_COARSE_LOOP(D)                       \
  for (ptrdiff_t lc_cpos##D = lc_cmin##D;       \
       lc_cpos##D < lc_cmax##D;                 \
       lc_cpos##D += lc_cstep##D)
  
#define LC_FINE_SETUP(D)                                                \
  lc_control.fine_loop.min.v[D] = lc_cpos##D;                           \
  lc_control.fine_loop.max.v[D] =                                       \
    lc_min(lc_control.coarse_loop.max.v[D],                             \
           lc_control.fine_loop.min.v[D] +                              \
           lc_control.coarse_loop.step.v[D]);                           \
  /*const*/ ptrdiff_t lc_fmin##D = lc_control.fine_loop.min.v[D];       \
  /*const*/ ptrdiff_t lc_fmax##D = lc_control.fine_loop.max.v[D];       \
  const ptrdiff_t lc_fstep##D = lc_control.fine_loop.step.v[D];         \
  const ptrdiff_t lc_ftoff##D =                                         \
    lc_control.fine_thread.idx.v[D] * lc_control.fine_thread.step.v[D];
#define LC_FINE_LOOP(I, NI, D)                                  \
  for (ptrdiff_t I = lc_fmin##D + lc_ftoff##D;                  \
       I < lc_fmax##D;                                          \
       I += lc_fstep##D)                                        \
  {                                                             \
    const ptrdiff_t NI CCTK_ATTRIBUTE_UNUSED =                  \
      CCTK_BUILTIN_EXPECT(lc_dir##D==0, 1) ? 0 :                \
       lc_dir##D<0 ? I+1 : lc_control.overall.max.v[D]-I;
  
#if VECTORISE_ALIGNED_ARRAYS
  /* Arrays are aligned: fmin0 is the aligned loop boundary; keep it,
     and set up imin to be the intended loop boundary */
#  define LC_ALIGN(i,j,k, vec_imin,vec_imax)                            \
  const ptrdiff_t vec_imin = lc_max(lc_control.overall.min.v[0], lc_fmin0); \
  const ptrdiff_t vec_imax = lc_fmax0;                                  \
  lc_assert(lc_fmin0 >= 0);                                             \
  lc_assert(lc_fmin0 < lc_fmax0);                                       \
  lc_assert(lc_fmax0 <= lc_control.overall.max.v[0]);                   \
  const ptrdiff_t lc_iminpos = lc_fmin0 + lc_ash0 * (j + lc_ash1 * k);  \
  const ptrdiff_t lc_iminoffset CCTK_ATTRIBUTE_UNUSED = lc_iminpos % lc_str0; \
  const int lc_fmax0_is_outer = lc_fmax0 == lc_control.overall.max.v[0]; \
  const ptrdiff_t lc_imaxpos = lc_fmax0 + lc_ash0 * (j + lc_ash1 * k);  \
  const ptrdiff_t lc_imaxoffset CCTK_ATTRIBUTE_UNUSED = lc_imaxpos % lc_str0; \
  lc_assert(lc_iminoffset == 0);                                        \
  if (!lc_fmax0_is_outer) lc_assert(lc_imaxoffset == 0);                \
  lc_assert(vec_imin >= lc_control.overall.min.v[0]);                   \
  lc_assert(vec_imax <= lc_control.overall.max.v[0]);                   \
  lc_assert(vec_imin >= lc_fmin0);                                      \
  lc_assert(vec_imax <= lc_fmax0);                                      \
  lc_assert(vec_imin < vec_imax);
#else
  /* Arrays are not aligned: fine.min[0] and fine.max[0] are the
     intended loop boundaries; override fmin0 and fmax0 to be aligned,
     and set imin and imax; this may move the fine loop boundaries
     except at outer boundaries to avoid partial stores */
#  define LC_ALIGN(i,j,k, vec_imin,vec_imax)                            \
  lc_fmin0 = lc_control.fine_loop.min.v[0];                             \
  lc_fmax0 = lc_control.fine_loop.max.v[0];                             \
  ptrdiff_t vec_imin = lc_fmin0;                                        \
  ptrdiff_t vec_imax = lc_fmax0;                                        \
  lc_assert(lc_fmin0 >= 0);                                             \
  lc_assert(lc_fmin0 < lc_fmax0);                                       \
  lc_assert(lc_fmin0 >= lc_control.overall.min.v[0]);                   \
  lc_assert(lc_fmax0 <= lc_control.overall.max.v[0]);                   \
  const int lc_fmin0_is_outer = lc_fmin0 == lc_control.overall.min.v[0]; \
  const int lc_fmax0_is_outer = lc_fmax0 == lc_control.overall.max.v[0]; \
  const ptrdiff_t lc_iminpos = lc_fmin0 + lc_ash0 * (j + lc_ash1 * k);  \
  const ptrdiff_t lc_iminoffset = lc_iminpos % lc_str0;                 \
  const ptrdiff_t lc_imaxpos = lc_fmax0 + lc_ash0 * (j + lc_ash1 * k);  \
  const ptrdiff_t lc_imaxoffset = lc_imaxpos % lc_str0;                 \
  lc_fmin0 -= lc_iminoffset;                                            \
  if (!lc_fmax0_is_outer) lc_fmax0 -= lc_imaxoffset;                    \
  lc_assert(lc_fmin0 < lc_fmax0);                                       \
  if (!lc_fmin0_is_outer) vec_imin = lc_fmin0;                          \
  if (!lc_fmax0_is_outer) vec_imax = lc_fmax0;                          \
  lc_assert(vec_imin >= lc_control.overall.min.v[0]);                   \
  lc_assert(vec_imax <= lc_control.overall.max.v[0]);                   \
  lc_assert(vec_imin >= lc_fmin0);                                      \
  lc_assert(vec_imax <= lc_fmax0);                                      \
  lc_assert(vec_imin < vec_imax);
#endif
  
#define LC_SELFTEST(i,j,k, vec_imin,vec_imax)                           \
  if (CCTK_BUILTIN_EXPECT(lc_control.selftest_array != NULL, 0)) {      \
    lc_selftest_set(&lc_control, vec_imin,vec_imax, lc_str0, i,j,k);    \
  }
  
  
  
#define LC_LOOP3STR_NORMAL(name,                                \
                           i,j,k, ni,nj,nk,                     \
                           idir_,jdir_,kdir_,                   \
                           imin_,jmin_,kmin_,                   \
                           imax_,jmax_,kmax_,                   \
                           iash_,jash_,kash_,                   \
                           vec_imin,vec_imax, istr_)            \
  do {                                                          \
    typedef int lc_loop3vec_##name;                             \
                                                                \
    const ptrdiff_t lc_dir0 CCTK_ATTRIBUTE_UNUSED = (idir_);    \
    const ptrdiff_t lc_dir1 CCTK_ATTRIBUTE_UNUSED = (jdir_);    \
    const ptrdiff_t lc_dir2 CCTK_ATTRIBUTE_UNUSED = (kdir_);    \
                                                                \
    const ptrdiff_t lc_ash0 CCTK_ATTRIBUTE_UNUSED = (iash_);    \
    const ptrdiff_t lc_ash1 CCTK_ATTRIBUTE_UNUSED = (jash_);    \
    const ptrdiff_t lc_ash2 CCTK_ATTRIBUTE_UNUSED = (kash_);    \
                                                                \
    const ptrdiff_t lc_str0 CCTK_ATTRIBUTE_UNUSED = (istr_);    \
                                                                \
    static struct lc_descr_t* lc_descr = NULL;                  \
    lc_descr_init(&lc_descr, #name, __FILE__, __LINE__);        \
                                                                \
    lc_control_t lc_control;                                    \
    lc_control_init(&lc_control, lc_descr,                      \
                    (imin_), (jmin_), (kmin_),                  \
                    (imax_), (jmax_), (kmax_),                  \
                    lc_ash0, lc_ash1, lc_ash2,                  \
                    lc_str0);                                   \
                                                                \
    /* Multithreading */                                        \
    for (lc_thread_init(&lc_control);                           \
         !lc_thread_done(&lc_control);                          \
         lc_thread_step(&lc_control))                           \
    {                                                           \
                                                                \
      /* Coarse loops */                                        \
      LC_COARSE_SETUP(2)                                        \
      LC_COARSE_SETUP(1)                                        \
      LC_COARSE_SETUP(0)                                        \
      LC_COARSE_LOOP(2) {                                       \
      LC_COARSE_LOOP(1) {                                       \
      LC_COARSE_LOOP(0) {                                       \
                                                                \
        /* Fine loops */                                        \
        LC_FINE_SETUP(2)                                        \
        LC_FINE_SETUP(1)                                        \
        LC_FINE_SETUP(0)                                        \
        LC_FINE_LOOP(k, nk, 2) {                                \
        LC_FINE_LOOP(j, nj, 1) {                                \
        LC_ALIGN(i,j,k, vec_imin,vec_imax)                      \
        LC_FINE_LOOP(i, ni, 0) {                                \
          LC_SELFTEST(i,j,k, vec_imin,vec_imax)                 \
          {
        
#define LC_ENDLOOP3STR_NORMAL(name)                                     \
          }                     /* body */                              \
        }}}}}}                  /* fine */                              \
      }}}                       /* coarse */                            \
    }                           /* multithreading */                    \
    lc_control_finish(&lc_control, lc_descr);                           \
    typedef lc_loop3vec_##name lc_ensure_proper_nesting CCTK_ATTRIBUTE_UNUSED; \
  } while(0)
    
    
    
/* Definitions to ensure compatibility with earlier versions of
   LoopControl */
#define LC_LOOP3VEC(name,                               \
                    i,j,k,                              \
                    imin,jmin,kmin,                     \
                    imax,jmax,kmax,                     \
                    iash,jash,kash,                     \
                    vec_imin,vec_imax, istr)            \
  LC_LOOP3STR_NORMAL(name,                              \
                     i,j,k,                             \
                     lc_ni,lc_nj,lc_nk,                 \
                     0,0,0,                             \
                     imin,jmin,kmin,                    \
                     imax,jmax,kmax,                    \
                     iash,jash,kash,                    \
                     vec_imin,vec_imax, istr)
#define LC_ENDLOOP3VEC(name)                    \
  LC_ENDLOOP3STR_NORMAL(name)

#define LC_LOOP3(name,                          \
                 i,j,k,                         \
                 imin,jmin,kmin,                \
                 imax,jmax,kmax,                \
                 iash,jash,kash)                \
  LC_LOOP3VEC(name,                             \
              i,j,k,                            \
              imin,jmin,kmin,                   \
              imax,jmax,kmax,                   \
              iash,jash,kash,                   \
              lc_vec_imin,lc_vec_imax, 1)
#define LC_ENDLOOP3(name)                       \
  LC_ENDLOOP3VEC(name)
  
  
  
/* Replace CCTK_LOOP macros */
#if !defined CCTK_LOOP3STR_NORMAL || !defined CCTK_ENDLOOP3STR_NORMAL
#  error "internal error"
#endif
#undef CCTK_LOOP3STR_NORMAL
#undef CCTK_ENDLOOP3STR_NORMAL
#define CCTK_LOOP3STR_NORMAL(name,                      \
                             i,j,k, ni,nj,nk,           \
                             idir,jdir,kdir,            \
                             imin,jmin,kmin,            \
                             imax,jmax,kmax,            \
                             iash,jash,kash,            \
                             vec_imin,vec_imax, istr)   \
  LC_LOOP3STR_NORMAL(name,                              \
                     i,j,k, ni,nj,nk,                   \
                     idir,jdir,kdir,                    \
                     imin,jmin,kmin,                    \
                     imax,jmax,kmax,                    \
                     iash,jash,kash,                    \
                     vec_imin,vec_imax, istr)
#define CCTK_ENDLOOP3STR_NORMAL(name)           \
  LC_ENDLOOP3STR_NORMAL(name)
  
  
  
#ifdef __cplusplus 
    }
#endif

#endif /* #ifdef CCODE */

#ifdef FCODE
#  include "loopcontrol_fortran.h"
#endif

#endif  /* #ifndef LOOPCONTROL_H */