aboutsummaryrefslogtreecommitdiff
path: root/src/vectors.h
blob: 444321226e2d2037f1e7322ceaa30d224c91a301 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#ifndef VECTORS_H
#define VECTORS_H

#include <cctk.h>



#if VECTORISE

/* TODO: support AVX */
#  if defined(__SSE__)          // Intel SSE
#    include "vectors-4-SSE.h"
#  elif defined(__ALTIVEC__)    // Power Altivec
#    include "vectors-4-Altivec.h"
#  endif

#  if defined(__AVX__)          // Intel AVX
#    include "vectors-8-AVX.h"
#  elif defined(__SSE2__)       // Intel SSE2
#    if VECTORISE_EMULATE_AVX
#      include "vectors-8-AVX.h"
#    else
#      include "vectors-8-SSE2.h"
#    endif
#  elif defined(_ARCH_450D)     // Blue Gene/P Double Hummer
#    include "vectors-8-DoubleHummer.h"
#  elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Power VSX
#    include "vectors-8-VSX.h"
#  endif

#endif

// Default implementation, do not vectorise
#if ! defined(CCTK_REAL4_VEC_SIZE)
#  include "vectors-4-default.h"
#endif
#if ! defined(CCTK_REAL8_VEC_SIZE)
#  include "vectors-8-default.h"
#endif



// Define macros for CCTK_REAL

#if defined(CCTK_REAL_PRECISION_4)

#  define vec_architecture vec4_architecture

#  define CCTK_REAL_VEC      CCTK_REAL4_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE

#  define vec_set1 vec4_set1
#  define vec_set  vec4_set

#  define vec_elt0 vec4_elt0
#  define vec_elt  vec4_elt

#  define vec_load                  vec4_load
#  define vec_loadu                 vec4_loadu
#  define vec_loadu_maybe           vec4_loadu_maybe
#  define vec_loadu_maybe3          vec4_loadu_maybe3
#  define vec_store                 vec4_store
#  define vec_store_nta             vec4_store_nta
#  define vec_store_partial_prepare vec4_store_partial_prepare
#  define vec_store_nta_partial     vec4_store_nta_partial
#  define vec_store_nta_partial_lo  vec4_store_nta_partial_lo
#  define vec_store_nta_partial_hi  vec4_store_nta_partial_hi
#  define vec_store_nta_partial_mid vec4_store_nta_partial_mid

#  define kpos k4pos
#  define kneg k4neg

#  define kadd k4add
#  define ksub k4sub
#  define kmul k4mul
#  define kdiv k4div

#  define kmadd  k4madd
#  define kmsub  k4msub
#  define knmadd k4nmadd
#  define knmsub k4nmsub

#  define kacos  k4acos
#  define kacosh k4acosh
#  define kasin  k4asin
#  define kasinh k4asinh
#  define katan  k4atan
#  define katan2 k4atan2
#  define katanh k4atanh
#  define kcos   k4cos
#  define kcosh  k4cosh
#  define kexp   k4exp
#  define kfabs  k4fabs
#  define kfmax  k4fmax
#  define kfmin  k4fmin
#  define kfnabs k4fnabs
#  define klog   k4log
#  define kpow   k4pow
#  define ksin   k4sin
#  define ksinh  k4sinh
#  define ksqrt  k4sqrt
#  define ktan   k4tan
#  define ktanh  k4tanh

#  define kifmsb k4ifmsb

#elif defined(CCTK_REAL_PRECISION_8)

#  define vec_architecture vec8_architecture

#  define CCTK_REAL_VEC      CCTK_REAL8_VEC
#  define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE

#  define vec_set1 vec8_set1
#  define vec_set  vec8_set

#  define vec_elt0 vec8_elt0
#  define vec_elt  vec8_elt

#  define vec_load                  vec8_load
#  define vec_loadu                 vec8_loadu
#  define vec_loadu_maybe           vec8_loadu_maybe
#  define vec_loadu_maybe3          vec8_loadu_maybe3
#  define vec_store                 vec8_store
#  define vec_store_partial_prepare vec8_store_partial_prepare
#  define vec_store_nta             vec8_store_nta
#  define vec_store_nta_partial     vec8_store_nta_partial
#  define vec_store_nta_partial_lo  vec8_store_nta_partial_lo
#  define vec_store_nta_partial_hi  vec8_store_nta_partial_hi
#  define vec_store_nta_partial_mid vec8_store_nta_partial_mid

#  define kneg k8neg

#  define kadd k8add
#  define ksub k8sub
#  define kmul k8mul
#  define kdiv k8div

#  define kmadd  k8madd
#  define kmsub  k8msub
#  define knmadd k8nmadd
#  define knmsub k8nmsub

#  define kacos  k8acos
#  define kacosh k8acosh
#  define kasin  k8asin
#  define kasinh k8asinh
#  define katan  k8atan
#  define katan2 k8atan2
#  define katanh k8atanh
#  define kcos   k8cos
#  define kcosh  k8cosh
#  define kexp   k8exp
#  define kfabs  k8fabs
#  define kfmax  k8fmax
#  define kfmin  k8fmin
#  define kfnabs k8fnabs
#  define klog   k8log
#  define kpow   k8pow
#  define ksin   k8sin
#  define ksinh  k8sinh
#  define ksqrt  k8sqrt
#  define ktan   k8tan
#  define ktanh  k8tanh

#  define kifmsb k8ifmsb

#else

#  error "Unknown CCTK_REAL_PRECISION"

#endif



#define kifneg(a,b,c) kifmsb(a,b,c)
#define kifpos(a,b,c) kifmsb(a,c,b)



#if CCTK_REAL_VEC_SIZE == 1
#  define vec_index vec_set(0)
#elif CCTK_REAL_VEC_SIZE == 2
#  define vec_index vec_set(0,1)
#elif CCTK_REAL_VEC_SIZE == 4
#  define vec_index vec_set(0,1,2,3)
#elif CCTK_REAL_VEC_SIZE == 8
#  define vec_index vec_set(0,1,2,3,4,5,6,7)
#elif CCTK_REAL_VEC_SIZE == 16
#  define vec_index vec_set(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
#else
#  error "Unsupported vector size"
#endif

  

// Define a class template for easier access from C++

#ifdef __cplusplus

template<typename T>
struct vecprops {
  typedef T scalar_t;
  typedef T vector_t;
  static inline int size()
  {
    return 1;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return a;
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return a;
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return x;
  }
  static inline vector_t pos (vector_t const& x)
  {
    return +x;
  }
  static inline vector_t neg (vector_t const& x)
  {
    return -x;
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return x+y;
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return x-y;
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return x*y;
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return x/y;
  }
};

template<>
struct vecprops<CCTK_REAL4> {
  typedef CCTK_REAL4     scalar_t;
  typedef CCTK_REAL4_VEC vector_t;
  static inline int size()
  {
    return CCTK_REAL4_VEC_SIZE;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return vec4_load(a);
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return vec4_loadu(a);
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return vec4_elt(x,d);
  }
  static inline vector_t pos (vector_t const& x)
  {
    return k4pos(x);
  }
  static inline vector_t neg (vector_t const& x)
  {
    return k4neg(x);
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return k4add(x,y);
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k4sub(x,y);
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k4mul(x,y);
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return k4div(x,y);
  }
};

template<>
struct vecprops<CCTK_REAL8> {
  typedef CCTK_REAL8     scalar_t;
  typedef CCTK_REAL8_VEC vector_t;
  static inline int size()
  {
    return CCTK_REAL8_VEC_SIZE;
  }
  static inline vector_t load (scalar_t const& a)
  {
    return vec8_load(a);
  }
  static inline vector_t loadu (scalar_t const& a)
  {
    return vec8_loadu(a);
  }
  static inline scalar_t elt (vector_t const& x, int const d)
  {
    return vec8_elt(x,d);
  }
  static inline vector_t neg (vector_t const& x)
  {
    return k8neg(x);
  }
  static inline vector_t add (vector_t const& x, vector_t const& y)
  {
    return k8add(x,y);
  }
  static inline vector_t sub (vector_t const& x, vector_t const& y)
  {
    return k8sub(x,y);
  }
  static inline vector_t mul (vector_t const& x, vector_t const& y)
  {
    return k8mul(x,y);
  }
  static inline vector_t div (vector_t const& x, vector_t const& y)
  {
    return k8div(x,y);
  }
};

#endif



// For Kranc

#ifdef KRANC_C

#  undef KRANC_DIFF_FUNCTIONS
#  if ! VECTORISE_INLINE
#    define KRANC_DIFF_FUNCTIONS
#  endif

#  undef Sign
#  define Sign(x) -999999999    // poison

#  undef ToReal
#  define ToReal(x) (vec_set1((CCTK_REAL)(x)))

#  undef KRANC_GFOFFSET3D
#  define KRANC_GFOFFSET3D(var,i,j,k)                                   \
  vec_loadu_maybe3((i),(j),(k),                                         \
                   *(CCTK_REAL const*)&                                 \
                   ((char const*)(var))[cdi*(i)+cdj*(j)+cdk*(k)])

#endif  // KRANC_C

#endif  // #ifndef VECTORS_H