aboutsummaryrefslogtreecommitdiff
path: root/src/vectors-4-SSE.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vectors-4-SSE.h')
-rw-r--r--src/vectors-4-SSE.h112
1 files changed, 56 insertions, 56 deletions
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index 56ecb22..9152c55 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -58,20 +58,20 @@
// original order is 0123
#define vec4_swap1032(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(2,3,0,1)); \
})
#define vec4_swap2301(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(1,0,3,2)); \
})
#define vec4_swap3210(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(0,1,2,3)); \
})
@@ -92,8 +92,8 @@
#if defined(__PGI)
# define vec4_elt(x_,d) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
CCTK_REAL4 a; \
if (d==0) a=vec4_elt0(x); \
else if (d==1) a=vec4_elt1(x); \
@@ -104,8 +104,8 @@
#else
# define vec4_elt(x_,d) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
CCTK_REAL4 a; \
switch (d) { \
case 0: a=vec4_elt0(x); break; \
@@ -132,8 +132,8 @@
#else
# define vec4_load_off1(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \
assert(0); \
@@ -142,16 +142,16 @@
})
# define vec4_load_off2(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-2]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+2]); \
_mm_shuffle_ps(lo,hi, _MM_SHUFFLE(1,0,3,2)); \
})
# define vec4_load_off1(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \
assert(0); \
@@ -169,8 +169,8 @@
#else
# define vec4_loadu_maybe(off,p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
(off) % CCTK_REAL4_VEC_SIZE == 0 ? \
vec4_load(p) : \
vec4_loadu(p); \
@@ -190,10 +190,10 @@
#if ! VECTORISE_STREAMING_STORES || ! defined(__SSE4A__)
# define vec4_store_nta_partial_lo(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: (&p)[0]=vec4_elt0(x); break; \
case 2: _mm_storel_ps(&p,x); break; \
@@ -202,10 +202,10 @@
})
# define vec4_store_nta_partial_hi(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: (&p)[3]=vec4_elt3(x); break; \
case 2: _mm_storeh_ps(&p+2,x); break; \
@@ -215,10 +215,10 @@
#else
# define vec4_store_nta_partial_lo(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: \
_mm_stream_ss(&p,x); \
@@ -234,10 +234,10 @@
})
# define vec4_store_nta_partial_hi(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: \
_mm_stream_ss(&p+3, vec4_swap3210(x)); \
@@ -301,8 +301,8 @@ static const union {
// Expensive functions
#define K4REPL(f,x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
vec4_set(f(vec4_elt0(x)), \
f(vec4_elt1(x)), \
f(vec4_elt2(x)), \
@@ -310,10 +310,10 @@ static const union {
})
#define K4REPL2(f,x_,a_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4 const aa=(a_); \
- CCTK_REAL4 const a=aa; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const a__=(a_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4 const a=a__; \
vec4_set(f(vec4_elt0(x),a), \
f(vec4_elt1(x),a), \
f(vec4_elt2(x),a), \
@@ -335,26 +335,26 @@ static const union {
# endif
# define k4ifpos(x,y,z) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4_VEC const yy=(y_); \
- CCTK_REAL4_VEC const y=yy; \
- CCTK_REAL4_VEC const zz=(z_); \
- CCTK_REAL4_VEC const z=zz; \
- vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \
- k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \
- k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \
- k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const y__=(y_); \
+ CCTK_REAL4_VEC const z__=(z_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4_VEC const y=y__; \
+ CCTK_REAL4_VEC const z=z__; \
+ vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \
+ k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \
+ k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \
+ k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \
})
#else
# define k4ifpos(x_,y_,z_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4_VEC const yy=(y_); \
- CCTK_REAL4_VEC const y=yy; \
- CCTK_REAL4_VEC const zz=(z_); \
- CCTK_REAL4_VEC const z=zz; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const y__=(y_); \
+ CCTK_REAL4_VEC const z__=(z_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4_VEC const y=y__; \
+ CCTK_REAL4_VEC const z=z__; \
CCTK_REAL4_VEC const mask = _mm_srai_epi32(x, 31); \
/* (y & ~mask) | (z & mask) */ \
_mm_or_ps(_mm_andnot_ps(mask, y), _mm_and_ps(mask, z)); \