1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
// Fallback vectorisation implementation: Do not vectorise
// We use macros here, so that we are not surprised by compilers which
// don't like to inline functions. This should also make debug builds
// (which may not inline) more efficient.
#include <assert.h>
#include <math.h>
#define vec4_architecture "scalar (no vectorisation, 32-bit precision)"
// Use CCTK_REAL4
#define CCTK_REAL4_VEC CCTK_REAL4
// Number of vector elements in a vector
#define CCTK_REAL4_VEC_SIZE 1
// Create a vector replicating a scalar
#define vec4_set1(a) (a)
// Create a vector from N scalars
#define vec4_set(a) (a)
// Access vectors elements
#define vec4_elt0(x) (x)
#define vec4_elt(x,d) (x)
// Load an aligned vector from memory
#define vec4_load(p) (p)
// Load an unaligned vector from memory
#define vec4_loadu(p) (p)
// Load a vector from memory that may or may not be aligned, as
// decided by the offset and the vector size. These functions are
// useful e.g. for loading neightbouring grid points while evaluating
// finite differencing stencils.
#define vec4_loadu_maybe(off,p) (p)
#define vec4_loadu_maybe3(off1,off2,off3,p) (p)
// Aligned store
#define vec4_store(p,x) ((p)=(x))
#define vec4_storeu(p,x) ((p)=(x))
// Unaligned store
#define vec4_store_nta(p,x) ((p)=(x))
#define vec4_store_partial_prepare(i,imin,imax) (0)
#define vec4_store_nta_partial(p,x) (vec4_store_nta(p,x))
// Store the n lower elements of a vector to memory
#define vec4_store_nta_partial_lo(p,x,n) (assert(0))
// Store the n higher elements of a vector into memory. This stores
// the vector elements into memory locations as if element 0 were
// stored at p.
#define vec4_store_nta_partial_hi(p,x,n) (assert(0))
#define vec4_store_nta_partial_mid(p,x,nlo,nhi) (assert(0))
// Operators
#define k4pos(x) (+(x))
#define k4neg(x) (-(x))
#define k4add(x,y) ((x)+(y))
#define k4sub(x,y) ((x)-(y))
#define k4mul(x,y) ((x)*(y))
#define k4div(x,y) ((x)/(y))
// Fused multiply-add, defined as [+-]x*y[+-]z
#define k4madd(x,y,z) (+(x)*(y)+(z))
#define k4msub(x,y,z) (+(x)*(y)-(z))
#define k4nmadd(x,y,z) (-(x)*(y)-(z))
#define k4nmsub(x,y,z) (-(x)*(y)+(z))
// Functions
#define k4acos(x) (acosf(x))
#define k4acosh(x) (acoshf(x))
#define k4asin(x) (asinf(x))
#define k4asinh(x) (asinhf(x))
#define k4atan(x) (atanf(x))
#define k4atan2(x,y) (atan2f(x,y))
#define k4atanh(x) (atanhf(x))
#define k4cos(x) (cosf(x))
#define k4cosh(x) (coshf(x))
#define k4exp(x) (expf(x))
#define k4fabs(x) (fabsf(x))
#define k4fmax(x,y) (fmaxf(x,y))
#define k4fmin(x,y) (fminf(x,y))
#define k4fnabs(x) (-fabsf(x))
#define k4log(x) (logf(x))
#define k4pow(x,a) (powf(x,a))
#define k4sin(x) (sinf(x))
#define k4sinh(x) (sinhf(x))
#define k4sqrt(x) (sqrtf(x))
#define k4tan(x) (tanf(x))
#define k4tanh(x) (tanhf(x))
#ifdef __cplusplus
# define k4sgn(x) ({ using namespace std; signbit(x); })
#else
# define k4sgn(x) (signbit(x))
#endif
#define k4ifmsb(x,y,z) (k4sgn(x)?(y):(z))
|