aboutsummaryrefslogtreecommitdiff
path: root/src/vectors.h
blob: c87446e26e7ed1745ee30b7c6854e089673168b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#ifndef VECTORS_H
#define VECTORS_H

#include <cctk.h>



#if VECTORISE

/* TODO: support AVX */
#  if defined(__SSE__)          // Intel SSE
#    include "vectors-4-SSE.h"
#  elif defined(__ALTIVEC__)    // Power Altivec
#    include "vectors-4-Altivec.h"
#  endif

#  if defined(__AVX__)          // Intel AVX
#    include "vectors-8-AVX.h"
#  elif defined(__SSE2__)       // Intel SSE2
#    if VECTORISE_EMULATE_AVX
#      include "vectors-8-AVX.h"
#    else
#      include "vectors-8-SSE2.h"
#    endif
#  elif defined(__bgq__) && defined(__VECTOR4DOUBLE__) // Blue Gene/Q QPX
#    include "vectors-8-QPX.h"
#  elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Power VSX
#    include "vectors-8-VSX.h"
#  elif defined(_ARCH_450D)     // Blue Gene/P Double Hummer
#    include "vectors-8-DoubleHummer.h"
#  endif

#endif

// Default implementation, do not vectorise
#if ! defined(CCTK_REAL4_VEC_SIZE)
#  include "vectors-4-default.h"
#endif
#if ! defined(CCTK_REAL8_VEC_SIZE)
#  include "vectors-8-default.h"
#endif



// Define macros for CCTK_REAL

#if defined(CCTK_REAL_PRECISION_4)

#  define vec_architecture vec4_architecture

#  define CCTK_REAL_VEC      CCTK_REAL4_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE
#  define CCTK_INTEGER       CCTK_INTEGER4
#  define CCTK_BOOLEAN       CCTK_BOOLEAN4
#  define CCTK_INTEGER_VEC   CCTK_INTEGER4_VEC
#  define CCTK_BOOLEAN_VEC   CCTK_BOOLEAN4_VEC

#  define vec_set1 vec4_set1
#  define vec_set  vec4_set

#  define vec_elt0 vec4_elt0
#  define vec_elt  vec4_elt

#  define vec_load                  vec4_load
#  define vec_loadu                 vec4_loadu
#  define vec_loadu_maybe           vec4_loadu_maybe
#  define vec_loadu_maybe3          vec4_loadu_maybe3
#  define vec_store                 vec4_store
#  define vec_store_nta             vec4_store_nta
#  define vec_store_partial_prepare vec4_store_partial_prepare
#  define vec_store_nta_partial     vec4_store_nta_partial
#  define vec_store_nta_partial_lo  vec4_store_nta_partial_lo
#  define vec_store_nta_partial_hi  vec4_store_nta_partial_hi
#  define vec_store_nta_partial_mid vec4_store_nta_partial_mid

#  define kneg k4neg

#  define kadd k4add
#  define ksub k4sub
#  define kmul k4mul
#  define kdiv k4div

#  define kmadd  k4madd
#  define kmsub  k4msub
#  define knmadd k4nmadd
#  define knmsub k4nmsub

#  define kacos     k4acos
#  define kacosh    k4acosh
#  define kasin     k4asin
#  define kasinh    k4asinh
#  define katan     k4atan
#  define katan2    k4atan2
#  define katanh    k4atanh
#  define kcopysign k4copysign
#  define kcos      k4cos
#  define kcosh     k4cosh
#  define kexp      k4exp
#  define kfabs     k4fabs
#  define kfmax     k4fmax
#  define kfmin     k4fmin
#  define kfnabs    k4fnabs
#  define klog      k4log
#  define kpow      k4pow
#  define ksin      k4sin
#  define ksinh     k4sinh
#  define ksgn      k4sgn
#  define ksqrt     k4sqrt
#  define ktan      k4tan
#  define ktanh     k4tanh

#  define klfalse k4lfalse
#  define kltrue  k4ltrue 
#  define klnot   k4lnot  
#  define kland   k4land  
#  define klor    k4lor   
#  define klxor   k4lxor  
#  define kifthen k4ifthen

#  define kcmpeq k4cmpeq
#  define kcmpne k4cmpne
#  define kcmpgt k4cmpgt
#  define kcmpge k4cmpge
#  define kcmplt k4cmplt
#  define kcmple k4cmple

#elif defined(CCTK_REAL_PRECISION_8)

#  define vec_architecture vec8_architecture

#  define CCTK_REAL_VEC      CCTK_REAL8_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE
#  define CCTK_INTEGER       CCTK_INTEGER8
#  define CCTK_BOOLEAN       CCTK_BOOLEAN8
#  define CCTK_INTEGER_VEC   CCTK_INTEGER8_VEC
#  define CCTK_BOOLEAN_VEC   CCTK_BOOLEAN8_VEC

#  define vec_set1 vec8_set1
#  define vec_set  vec8_set

#  define vec_elt0 vec8_elt0
#  define vec_elt  vec8_elt

#  define vec_load                  vec8_load
#  define vec_loadu                 vec8_loadu
#  define vec_loadu_maybe           vec8_loadu_maybe
#  define vec_loadu_maybe3          vec8_loadu_maybe3
#  define vec_store                 vec8_store
#  define vec_store_partial_prepare vec8_store_partial_prepare
#  define vec_store_nta             vec8_store_nta
#  define vec_store_nta_partial     vec8_store_nta_partial
#  define vec_store_nta_partial_lo  vec8_store_nta_partial_lo
#  define vec_store_nta_partial_hi  vec8_store_nta_partial_hi
#  define vec_store_nta_partial_mid vec8_store_nta_partial_mid

#  define kneg k8neg

#  define kadd k8add
#  define ksub k8sub
#  define kmul k8mul
#  define kdiv k8div

#  define kmadd  k8madd
#  define kmsub  k8msub
#  define knmadd k8nmadd
#  define knmsub k8nmsub

#  define kacos     k8acos
#  define kacosh    k8acosh
#  define kasin     k8asin
#  define kasinh    k8asinh
#  define katan     k8atan
#  define katan2    k8atan2
#  define katanh    k8atanh
#  define kcopysign k8copysign
#  define kcos      k8cos
#  define kcosh     k8cosh
#  define kexp      k8exp
#  define kfabs     k8fabs
#  define kfmax     k8fmax
#  define kfmin     k8fmin
#  define kfnabs    k8fnabs
#  define klog      k8log
#  define kpow      k8pow
#  define ksin      k8sin
#  define ksinh     k8sinh
#  define ksgn      k8sgn
#  define ksqrt     k8sqrt
#  define ktan      k8tan
#  define ktanh     k8tanh

#  define klfalse k8lfalse
#  define kltrue  k8ltrue 
#  define klnot   k8lnot  
#  define kland   k8land  
#  define klor    k8lor   
#  define klxor   k8lxor  
#  define kifthen k8ifthen

#  define kcmpeq k8cmpeq
#  define kcmpne k8cmpne
#  define kcmpgt k8cmpgt
#  define kcmpge k8cmpge
#  define kcmplt k8cmplt
#  define kcmple k8cmple

#else

#  error "Unknown CCTK_REAL_PRECISION"

#endif



// Deprecated
#define kifmsb(a,b,c) kifthen(a,b,c)
#define kifneg(a,b,c) kifmsb(a,b,c)
#define kifpos(a,b,c) kifmsb(a,c,b)

#define kisgn(a) (-42424242)



#if CCTK_REAL_VEC_SIZE == 1
#  define vec_index vec_set(0)
#elif CCTK_REAL_VEC_SIZE == 2
#  define vec_index vec_set(0,1)
#elif CCTK_REAL_VEC_SIZE == 4
#  define vec_index vec_set(0,1,2,3)
#elif CCTK_REAL_VEC_SIZE == 8
#  define vec_index vec_set(0,1,2,3,4,5,6,7)
#elif CCTK_REAL_VEC_SIZE == 16
#  define vec_index vec_set(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
#else
#  error "Unsupported vector size"
#endif

  

// Define a class template for easier access from C++

#ifdef __cplusplus

template<typename T>
struct vecprops {
  typedef T scalar_t;
  typedef T vector_t;
  static inline int size()
  {
    return 1;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return a;
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return a;
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return x;
  }
  static inline vector_t neg (vector_t const& x)
  {
    return -x;
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return x+y;
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return x-y;
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return x*y;
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return x/y;
  }
};

template<>
struct vecprops<CCTK_REAL4> {
  typedef CCTK_REAL4     scalar_t;
  typedef CCTK_REAL4_VEC vector_t;
  static inline int size()
  {
    return CCTK_REAL4_VEC_SIZE;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return vec4_load(a);
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return vec4_loadu(a);
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return vec4_elt(x,d);
  }
  static inline vector_t neg (vector_t const& x)
  {
    return k4neg(x);
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return k4add(x,y);
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k4sub(x,y);
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k4mul(x,y);
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return k4div(x,y);
  }
};

template<>
struct vecprops<CCTK_REAL8> {
  typedef CCTK_REAL8     scalar_t;
  typedef CCTK_REAL8_VEC vector_t;
  static inline int size()
  {
    return CCTK_REAL8_VEC_SIZE;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return vec8_load(a);
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return vec8_loadu(a);
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return vec8_elt(x,d);
  }
  static inline vector_t neg (vector_t const& x)
  {
    return k8neg(x);
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return k8add(x,y);
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k8sub(x,y);
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k8mul(x,y);
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return k8div(x,y);
  }
};

#endif



// For Kranc

#ifdef KRANC_C

#  undef KRANC_DIFF_FUNCTIONS
#  if ! VECTORISE_INLINE
#    define KRANC_DIFF_FUNCTIONS
#  endif

#  undef ToReal
#  define ToReal(x) (vec_set1(CCTK_REAL(x)))

#  undef KRANC_GFOFFSET3D
#  define KRANC_GFOFFSET3D(var,i,j,k)                                   \
  vec_loadu_maybe3((i),(j),(k),                                         \
                   *(CCTK_REAL const*)&                                 \
                   ((char const*)(var))[cdi*(i)+cdj*(j)+cdk*(k)])

#endif  // KRANC_C

#endif  // #ifndef VECTORS_H