aboutsummaryrefslogtreecommitdiff
path: root/src/vectors.h
blob: f64bf829aee1f7c2ccecc563069a61fd39e0b0c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
// -*-C++-*-

#ifndef VECTORS_H
#define VECTORS_H

#include <cctk.h>



#define vec_static_assert(x) namespace { typedef int vsa[(x) ? 1 : -1]; }



#if VECTORISE

#  if defined __AVX__ && !defined DISABLE_AVX // Intel AVX
#    include "vectors-4-AVX.h"
#  elif defined __SSE__         // Intel SSE
#    include "vectors-4-SSE.h"
#  elif defined __ALTIVEC__     // Power Altivec
#    include "vectors-4-Altivec.h"
#  endif

#  if defined __AVX512F__
#    include "vectors-8-AVX512.h"
#  elif defined __MIC__           // Intel MIC
#    include "vectors-8-MIC.h"
#  elif defined __AVX__ && !defined DISABLE_AVX // Intel AVX
#    include "vectors-8-AVX.h"
#  elif defined __SSE2__        // Intel SSE2
#    include "vectors-8-SSE2.h"
#  elif defined __bgq__ && defined __VECTOR4DOUBLE__ // Blue Gene/Q QPX
#    include "vectors-8-QPX.h"
#  elif defined __ALTIVEC__ && defined _ARCH_PWR7 // Power VSX
#    include "vectors-8-VSX.h"
#  elif defined _ARCH_450D      // Blue Gene/P Double Hummer
#    include "vectors-8-DoubleHummer.h"
#  endif

#endif

// Default implementation, do not vectorise
#ifndef CCTK_REAL4_VEC_SIZE
#  include "vectors-4-default.h"
#endif
#ifndef CCTK_REAL8_VEC_SIZE
#  include "vectors-8-default.h"
#endif



// Operation counters
#ifndef VEC_COUNT
#  define VEC_COUNT(x)
#endif
// This expects variables declared as
//    ptrdiff_t vec_op_counter, vec_mem_counter;
#define vec_op_inc  ((void)(VEC_COUNT(vec_op_counter+=CCTK_REAL_VEC_SIZE)+0))
#define vec_mem_inc ((void)(VEC_COUNT(vec_mem_counter+=CCTK_REAL_VEC_SIZE)+0))



// Define macros for CCTK_REAL

#if defined CCTK_REAL_PRECISION_4 

#  define vec_architecture vec4_architecture

#  define CCTK_REAL_VEC      CCTK_REAL4_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE
#  define CCTK_INTEGER       CCTK_INTEGER4
#  define CCTK_BOOLEAN       CCTK_BOOLEAN4
#  define CCTK_INTEGER_VEC   CCTK_INTEGER4_VEC
#  define CCTK_BOOLEAN_VEC   CCTK_BOOLEAN4_VEC

#  define vec_set1 vec4_set1
#  define vec_set  vec4_set

#  define vec_elt  vec4_elt
#  define vec_elti vec4_elti
#  define vec_eltb vec4_eltb

#  define vec_load(p)            (vec_mem_inc, vec4_load(p))
#  define vec_loadu(p)           (vec_mem_inc, vec4_loadu(p))
#  define vec_loadu_maybe(off,p) (vec_mem_inc, vec4_loadu_maybe(off,p))
#  define vec_loadu_maybe3(off1,off2,off3,p)            \
  (vec_mem_inc, vec4_loadu_maybe3(off1,off2,off3,p))
#  define vec_store(p,x)             (vec_mem_inc, vec4_store(p,x))
#  define vec_store_nta(p,x)         (vec_mem_inc, vec4_store_nta(p,x))
#  define vec_store_partial_prepare  vec4_store_partial_prepare
#  define vec_store_nta_partial(p,x) (vec_mem_inc, vec4_store_nta_partial(p,x))
#  define vec_store_nta_partial_lo   vec4_store_nta_partial_lo
#  define vec_store_nta_partial_hi   vec4_store_nta_partial_hi
#  define vec_store_nta_partial_mid  vec4_store_nta_partial_mid

#  define kneg(x) (vec_op_inc, k4neg(x))

#  define kadd(x,y) (vec_op_inc, k4add(x,y))
#  define ksub(x,y) (vec_op_inc, k4sub(x,y))
#  define kmul(x,y) (vec_op_inc, k4mul(x,y))
#  define kdiv(x,y) (vec_op_inc, k4div(x,y))

#  define kmadd(x,y,z)  (vec_op_inc, vec_op_inc, k4madd(x,y,z))
#  define kmsub(x,y,z)  (vec_op_inc, vec_op_inc, k4msub(x,y,z))
#  define knmadd(x,y,z) (vec_op_inc, vec_op_inc, k4nmadd(x,y,z))
#  define knmsub(x,y,z) (vec_op_inc, vec_op_inc, k4nmsub(x,y,z))

#  define kacos     k4acos
#  define kacosh    k4acosh
#  define kasin     k4asin
#  define kasinh    k4asinh
#  define katan     k4atan
#  define katan2    k4atan2
#  define katanh    k4atanh
#  define kcopysign(x,y) (vec_op_inc, k4copysign(x,y))
#  define kcos      k4cos
#  define kcosh     k4cosh
#  define kexp      k4exp
#  define kfabs(x)   (vec_op_inc, k4fabs(x))
#  define kfmax(x,y) (vec_op_inc, k4fmax(x,y))
#  define kfmin(x,y) (vec_op_inc, k4fmin(x,y))
#  define kfnabs(x)  (vec_op_inc, k4fnabs(x))
#  define klog      k4log
#  define kpow      k4pow
#  define ksin      k4sin
#  define ksinh     k4sinh
#  define ksgn      k4sgn
#  define ksqrt     k4sqrt
#  define ktan      k4tan
#  define ktanh     k4tanh

#  define klfalse k4lfalse
#  define kltrue  k4ltrue
#  define klnot   k4lnot
#  define kland   k4land
#  define klor    k4lor
#  define klxor   k4lxor
#  define kifthen k4ifthen

#  define kcmpeq k4cmpeq
#  define kcmpne k4cmpne
#  define kcmpgt k4cmpgt
#  define kcmpge k4cmpge
#  define kcmplt k4cmplt
#  define kcmple k4cmple

#elif defined CCTK_REAL_PRECISION_8 

#  define vec_architecture vec8_architecture

#  define CCTK_REAL_VEC      CCTK_REAL8_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE
#  define CCTK_INTEGER       CCTK_INTEGER8
#  define CCTK_BOOLEAN       CCTK_BOOLEAN8
#  define CCTK_INTEGER_VEC   CCTK_INTEGER8_VEC
#  define CCTK_BOOLEAN_VEC   CCTK_BOOLEAN8_VEC

#  define vec_set1 vec8_set1
#  define vec_set  vec8_set

#  define vec_elt  vec8_elt
#  define vec_elti vec8_elti
#  define vec_eltb vec8_eltb

#  define vec_load(p)                (vec_mem_inc, vec8_load(p))
#  define vec_loadu(p)               (vec_mem_inc, vec8_loadu(p))
#  define vec_loadu_maybe(off,p)     (vec_mem_inc, vec8_loadu_maybe(off,p))
#  define vec_loadu_maybe3(off1,off2,off3,p)            \
  (vec_mem_inc, vec8_loadu_maybe3(off1,off2,off3,p))
#  define vec_store(p,x)             (vec_mem_inc, vec8_store(p,x))
#  define vec_store_nta(p,x)         (vec_mem_inc, vec8_store_nta(p,x))
#  define vec_store_partial_prepare  vec8_store_partial_prepare
#  define vec_store_nta_partial(p,x) (vec_mem_inc, vec8_store_nta_partial(p,x))
#  define vec_store_nta_partial_lo   vec8_store_nta_partial_lo
#  define vec_store_nta_partial_hi   vec8_store_nta_partial_hi
#  define vec_store_nta_partial_mid  vec8_store_nta_partial_mid

#  define kneg(x) (vec_op_inc, k8neg(x))

#  define kadd(x,y) (vec_op_inc, k8add(x,y))
#  define ksub(x,y) (vec_op_inc, k8sub(x,y))
#  define kmul(x,y) (vec_op_inc, k8mul(x,y))
#  define kdiv(x,y) (vec_op_inc, k8div(x,y))

#  define kmadd(x,y,z)  (vec_op_inc, vec_op_inc, k8madd(x,y,z))
#  define kmsub(x,y,z)  (vec_op_inc, vec_op_inc, k8msub(x,y,z))
#  define knmadd(x,y,z) (vec_op_inc, vec_op_inc, k8nmadd(x,y,z))
#  define knmsub(x,y,z) (vec_op_inc, vec_op_inc, k8nmsub(x,y,z))

#  define kacos     k8acos
#  define kacosh    k8acosh
#  define kasin     k8asin
#  define kasinh    k8asinh
#  define katan     k8atan
#  define katan2    k8atan2
#  define katanh    k8atanh
#  define kcopysign(x,y) (vec_op_inc, k8copysign(x,y))
#  define kcos      k8cos
#  define kcosh     k8cosh
#  define kexp      k8exp
#  define kfabs(x)   (vec_op_inc, k8fabs(x))
#  define kfmax(x,y) (vec_op_inc, k8fmax(x,y))
#  define kfmin(x,y) (vec_op_inc, k8fmin(x,y))
#  define kfnabs(x)  (vec_op_inc, k8fnabs(x))
#  define klog      k8log
#  define kpow      k8pow
#  define ksin      k8sin
#  define ksinh     k8sinh
#  define ksgn      k8sgn
#  define ksqrt     k8sqrt
#  define ktan      k8tan
#  define ktanh     k8tanh

#  define klfalse k8lfalse
#  define kltrue  k8ltrue
#  define klnot   k8lnot
#  define kland   k8land
#  define klor    k8lor
#  define klxor   k8lxor
#  define kifthen k8ifthen

#  define kcmpeq k8cmpeq
#  define kcmpne k8cmpne
#  define kcmpgt k8cmpgt
#  define kcmpge k8cmpge
#  define kcmplt k8cmplt
#  define kcmple k8cmple

#else

#  error "Unknown CCTK_REAL_PRECISION"

#endif



// Deprecated
#define kifmsb(a,b,c) kifthen(a,b,c)
#define kifneg(a,b,c) kifmsb(a,b,c)
#define kifpos(a,b,c) kifmsb(a,c,b)

#define kisgn(a) (-42424242)



#if CCTK_REAL_VEC_SIZE == 1
#  define vec_index vec_set(0)
#elif CCTK_REAL_VEC_SIZE == 2
#  define vec_index vec_set(0,1)
#elif CCTK_REAL_VEC_SIZE == 4
#  define vec_index vec_set(0,1,2,3)
#elif CCTK_REAL_VEC_SIZE == 8
#  define vec_index vec_set(0,1,2,3,4,5,6,7)
#elif CCTK_REAL_VEC_SIZE == 16
#  define vec_index vec_set(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
#else
#  error "Unsupported vector size"
#endif

  

// Define a class template for easier access from C++

#ifdef __cplusplus

#include <cstdlib>

template<typename T>
struct vecprops {
  typedef T scalar_t;
  typedef T vector_t;
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  std::size_t size()
  {
    return 1;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t load (scalar_t const& a)
  {
    return a;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t loadu (scalar_t const& a)
  {
    return a;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  scalar_t elt (vector_t const& x, std::ptrdiff_t const d)
  {
    return x;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t neg (vector_t const& x)
  {
    return -x;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t add (vector_t const& x, vector_t const& y)
  {
    return x+y;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t sub (vector_t const& x, vector_t const& y)
  {
    return x-y;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t mul (vector_t const& x, vector_t const& y)
  {
    return x*y;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t div (vector_t const& x, vector_t const& y)
  {
    return x/y;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t madd (vector_t const& x, vector_t const& y, vector_t const& z)
  {
    return x*y+z;
  }
};

template<>
struct vecprops<CCTK_REAL4> {
  typedef CCTK_REAL4     scalar_t;
  typedef CCTK_REAL4_VEC vector_t;
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  int size()
  {
    return CCTK_REAL4_VEC_SIZE;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t load (scalar_t const& a)
  {
    return vec4_load(a);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
   vector_t loadu (scalar_t const& a)
  {
    return vec4_loadu(a);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  scalar_t elt (vector_t const& x, int const d)
  {
    return vec4_elt(x,d);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t neg (vector_t const& x)
  {
    return k4neg(x);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t add (vector_t const& x, vector_t const& y)
  {
    return k4add(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k4sub(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k4mul(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t div (vector_t const& x, vector_t const& y)
  {
    return k4div(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t madd (vector_t const& x, vector_t const& y, vector_t const& z)
  {
    return k4madd(x,y,z);
  }
};

template<>
struct vecprops<CCTK_REAL8> {
  typedef CCTK_REAL8     scalar_t;
  typedef CCTK_REAL8_VEC vector_t;
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  int size()
  {
    return CCTK_REAL8_VEC_SIZE;
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t load (scalar_t const& a)
  {
    return vec8_load(a);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t loadu (scalar_t const& a)
  {
    return vec8_loadu(a);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  scalar_t elt (vector_t const& x, int const d)
  {
    return vec8_elt(x,d);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t neg (vector_t const& x)
  {
    return k8neg(x);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t add (vector_t const& x, vector_t const& y)
  {
    return k8add(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k8sub(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k8mul(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t div (vector_t const& x, vector_t const& y)
  {
    return k8div(x,y);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vector_t madd (vector_t const& x, vector_t const& y, vector_t const& z)
  {
    return k8madd(x,y,z);
  }
};

template<typename T>
struct vectype {
private:
  typedef vecprops<T> props;
public:
  typedef typename props::vector_t vector_t;
  typedef typename props::scalar_t scalar_t;
  vector_t v;
  vectype() { }
  vectype(vectype const& x): v(x.v) { }
  vectype(vector_t const& x): v(x) { }
  operator vector_t() const { return v; }
  vectype& operator=(vectype const& x) { v=x.v; return *this; }
  
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  std::size_t size() const {
    return props::size();
  }
  
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype load(scalar_t const& a)
  {
    return props::load(a);
  }
  static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype loadu(scalar_t const& a)
  {
    return props::loadu(a);
  }
  
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  scalar_t elt(std::ptrdiff_t const d) const
  {
    return props::elt(*this, d);
  }
  
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator+() const
  {
    return *this;
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator-() const
  {
    return props::neg(*this);
  }
  
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator+(vectype const& x) const
  {
    return props::add(*this, x);
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator-(vectype const& x) const
  {
    return props::sub(*this, x);
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator*(vectype const& x) const
  {
    return props::mul(*this, x);
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype operator/(vectype const& x) const
  {
    return props::div(*this, x);
  }
  
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype& operator+=(vectype const& x)
  {
    return *this = *this+x;
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype& operator-=(vectype const& x)
  {
    return *this = *this-x;
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype& operator*=(vectype const& x)
  {
    return *this = *this*x;
  }
  inline CCTK_ATTRIBUTE_ALWAYS_INLINE
  vectype& operator/=(vectype const& x)
  {
    return *this = *this/x;
  }
};

#endif



// For Kranc

#ifdef KRANC_C

#  undef KRANC_DIFF_FUNCTIONS
#  if ! VECTORISE_INLINE
#    define KRANC_DIFF_FUNCTIONS
#  endif

#  undef ToReal
#  define ToReal(x) (vec_set1(CCTK_REAL(x)))

#  undef IfThen
#  if (defined __PGI ||                                                 \
       defined _ARCH_450D ||                                            \
       (defined __ALTIVEC__ && defined _ARCH_PWR7))
static inline CCTK_ATTRIBUTE_ALWAYS_INLINE
CCTK_REAL_VEC vec_IfThen(CCTK_BOOLEAN x, CCTK_REAL_VEC y, CCTK_REAL_VEC z)
{
  if (x) return y; else return z;
}
#    define IfThen(x,y,z) vec_IfThen(x,y,z)
#  else
#    define IfThen(x,y,z) ((x) ? CCTK_REAL_VEC(y) : CCTK_REAL_VEC(z))
#  endif

#  undef KRANC_GFOFFSET3D
#  define KRANC_GFOFFSET3D(var,i,j,k)                                   \
  vec_loadu_maybe3((i),(j),(k),                                         \
                   *(CCTK_REAL const*)&                                 \
                   ((char const*)(var))[cdi*(i)+cdj*(j)+cdk*(k)])

#endif  // KRANC_C

#endif  // #ifndef VECTORS_H