aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-12-22 15:35:28 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-12-22 15:35:28 +0000
commit6bbe334ae29817b03d598ad977306fb861956702 (patch)
tree93a509faa68b7b97d71a4ce604dcccbd39ff7b21
parent0ef2cb5241112d1ddf2078cea47edf5272d0f846 (diff)
Make vectorisation macros safer
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@44 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r--src/vectors-4-Altivec.h44
-rw-r--r--src/vectors-4-SSE.h112
-rw-r--r--src/vectors-8-AVX.h8
-rw-r--r--src/vectors-8-DoubleHummer.h72
-rw-r--r--src/vectors-8-SSE2.h88
-rw-r--r--src/vectors-8-VSX.h12
6 files changed, 168 insertions, 168 deletions
diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h
index a55c683..679c34e 100644
--- a/src/vectors-4-Altivec.h
+++ b/src/vectors-4-Altivec.h
@@ -67,10 +67,10 @@
// the non-temporal hint is probably ignored
#define vec4_store_nta_partial_lo(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 3: (&p)[2]=x[2]; \
case 2: (&p)[1]=x[1]; \
@@ -79,10 +79,10 @@
})
#define vec4_store_nta_partial_hi(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 3: (&p)[1]=x[1]; \
case 2: (&p)[2]=x[2]; \
@@ -91,14 +91,14 @@
})
#define vec4_store_nta_partial_mid(p_,x_,nlo_,nhi_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- int const nnlo=(nlo_); \
- int const nlo=nnlo; \
- int const nnhi=(nhi_); \
- int const nhi=nnhi; \
+ CCTK_REAL4 const& p__ =(p_); \
+ CCTK_REAL4_VEC const x__ =(x_); \
+ int const nlo__=(nlo_); \
+ int const nhi__=(nhi_); \
+ CCTK_REAL4 const& p =p__; \
+ CCTK_REAL4_VEC const x =x__; \
+ int const nlo=nlo__; \
+ int const nhi=nhi__; \
if (nlo==3 and nhi==3) { \
(&p)[1]=x[1]; \
(&p)[2]=x[2]; \
@@ -137,8 +137,8 @@
// Expensive functions
#define K4REPL(f,x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
vec4_set(f(vec4_elt0(x)), \
f(vec4_elt1(x)), \
f(vec4_elt2(x)), \
@@ -146,10 +146,10 @@
})
#define K4REPL2(f,x_,a_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4 const aa=(a_); \
- CCTK_REAL4 const a=aa; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const a__=(a_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4 const a=a__; \
vec4_set(f(vec4_elt0(x),a), \
f(vec4_elt1(x),a), \
f(vec4_elt2(x),a), \
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index 56ecb22..9152c55 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -58,20 +58,20 @@
// original order is 0123
#define vec4_swap1032(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(2,3,0,1)); \
})
#define vec4_swap2301(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(1,0,3,2)); \
})
#define vec4_swap3210(x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
_mm_shuffle_ps(x,x, _MM_SHUFFLE(0,1,2,3)); \
})
@@ -92,8 +92,8 @@
#if defined(__PGI)
# define vec4_elt(x_,d) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
CCTK_REAL4 a; \
if (d==0) a=vec4_elt0(x); \
else if (d==1) a=vec4_elt1(x); \
@@ -104,8 +104,8 @@
#else
# define vec4_elt(x_,d) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
CCTK_REAL4 a; \
switch (d) { \
case 0: a=vec4_elt0(x); break; \
@@ -132,8 +132,8 @@
#else
# define vec4_load_off1(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \
assert(0); \
@@ -142,16 +142,16 @@
})
# define vec4_load_off2(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-2]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+2]); \
_mm_shuffle_ps(lo,hi, _MM_SHUFFLE(1,0,3,2)); \
})
# define vec4_load_off1(p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \
CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \
assert(0); \
@@ -169,8 +169,8 @@
#else
# define vec4_loadu_maybe(off,p_) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4 const& p=p__; \
(off) % CCTK_REAL4_VEC_SIZE == 0 ? \
vec4_load(p) : \
vec4_loadu(p); \
@@ -190,10 +190,10 @@
#if ! VECTORISE_STREAMING_STORES || ! defined(__SSE4A__)
# define vec4_store_nta_partial_lo(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: (&p)[0]=vec4_elt0(x); break; \
case 2: _mm_storel_ps(&p,x); break; \
@@ -202,10 +202,10 @@
})
# define vec4_store_nta_partial_hi(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: (&p)[3]=vec4_elt3(x); break; \
case 2: _mm_storeh_ps(&p+2,x); break; \
@@ -215,10 +215,10 @@
#else
# define vec4_store_nta_partial_lo(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: \
_mm_stream_ss(&p,x); \
@@ -234,10 +234,10 @@
})
# define vec4_store_nta_partial_hi(p_,x_,n) \
({ \
- CCTK_REAL4 const& pp=(p_); \
- CCTK_REAL4 const& p=pp; \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4 const& p__=(p_); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const& p=p__; \
+ CCTK_REAL4_VEC const x=x__; \
switch (n) { \
case 1: \
_mm_stream_ss(&p+3, vec4_swap3210(x)); \
@@ -301,8 +301,8 @@ static const union {
// Expensive functions
#define K4REPL(f,x_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const x=x__; \
vec4_set(f(vec4_elt0(x)), \
f(vec4_elt1(x)), \
f(vec4_elt2(x)), \
@@ -310,10 +310,10 @@ static const union {
})
#define K4REPL2(f,x_,a_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4 const aa=(a_); \
- CCTK_REAL4 const a=aa; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4 const a__=(a_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4 const a=a__; \
vec4_set(f(vec4_elt0(x),a), \
f(vec4_elt1(x),a), \
f(vec4_elt2(x),a), \
@@ -335,26 +335,26 @@ static const union {
# endif
# define k4ifpos(x,y,z) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4_VEC const yy=(y_); \
- CCTK_REAL4_VEC const y=yy; \
- CCTK_REAL4_VEC const zz=(z_); \
- CCTK_REAL4_VEC const z=zz; \
- vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \
- k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \
- k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \
- k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const y__=(y_); \
+ CCTK_REAL4_VEC const z__=(z_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4_VEC const y=y__; \
+ CCTK_REAL4_VEC const z=z__; \
+ vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \
+ k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \
+ k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \
+ k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \
})
#else
# define k4ifpos(x_,y_,z_) \
({ \
- CCTK_REAL4_VEC const xx=(x_); \
- CCTK_REAL4_VEC const x=xx; \
- CCTK_REAL4_VEC const yy=(y_); \
- CCTK_REAL4_VEC const y=yy; \
- CCTK_REAL4_VEC const zz=(z_); \
- CCTK_REAL4_VEC const z=zz; \
+ CCTK_REAL4_VEC const x__=(x_); \
+ CCTK_REAL4_VEC const y__=(y_); \
+ CCTK_REAL4_VEC const z__=(z_); \
+ CCTK_REAL4_VEC const x=x__; \
+ CCTK_REAL4_VEC const y=y__; \
+ CCTK_REAL4_VEC const z=z__; \
CCTK_REAL4_VEC const mask = _mm_srai_epi32(x, 31); \
/* (y & ~mask) | (z & mask) */ \
_mm_or_ps(_mm_andnot_ps(mask, y), _mm_and_ps(mask, z)); \
diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h
index eab4055..144d3b5 100644
--- a/src/vectors-8-AVX.h
+++ b/src/vectors-8-AVX.h
@@ -80,8 +80,8 @@ union k8const_t {
#else
# define vec8_loadu_maybe(off,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
(off) % CCTK_REAL8_VEC_SIZE == 0 ? \
vec8_load(p) : \
vec8_load_off1(p); \
@@ -93,8 +93,8 @@ union k8const_t {
# else
# define vec8_loadu_maybe3(off1,off2,off3,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
((off2) % CCTK_REAL8_VEC_SIZE != 0 or \
(off3) % CCTK_REAL8_VEC_SIZE != 0) ? \
vec8_loadu(p) : \
diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h
index e61425c..951ca5d 100644
--- a/src/vectors-8-DoubleHummer.h
+++ b/src/vectors-8-DoubleHummer.h
@@ -32,8 +32,8 @@
#define vec8_elt1(x) (__cimag(x))
#define vec8_elt(x_,d) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
CCTK_REAL8 a; \
switch (d) { \
case 0: a=vec8_elt0(x); break; \
@@ -52,16 +52,16 @@
#if ! VECTORISE_ALWAYS_USE_ALIGNED_LOADS
# define vec8_load_off1(p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
vec8_set((&p)[0],(&p)[1]); \
})
#else
#if 0
# define vec8_load_off1(p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
CCTK_REAL8_VEC const lo = __lfxd((CCTK_REAL8 *)(&p-1)); \
CCTK_REAL8_VEC const hi = __lfxd((CCTK_REAL8 *)(&p+1)); \
__fpsel(vec8_set(-1.0,+1.0),lo,hi); \
@@ -69,8 +69,8 @@
#endif
# define vec8_load_off1(p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
CCTK_REAL8_VEC const lo = vec8_load((&p)[-1]); \
CCTK_REAL8_VEC const hi = vec8_load((&p)[+1]); \
__fxmr(__fpsel(vec8_set(+1.0,-1.0),lo,hi)); \
@@ -78,8 +78,8 @@
#endif
#define vec8_loadu(p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
int const off = (ptrdiff_t)&p & 0xf; \
off==0 ? vec8_load(p) : vec8_load_off1(p); \
})
@@ -93,8 +93,8 @@
#else
# define vec8_loadu_maybe(off,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
(off) % CCTK_REAL8_VEC_SIZE == 0 ? \
vec8_load(p) : \
vec8_load_off1(p); \
@@ -105,8 +105,8 @@
# else
# define vec8_loadu_maybe3(off1,off2,off3,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
((off2) % CCTK_REAL8_VEC_SIZE != 0 || \
(off3) % CCTK_REAL8_VEC_SIZE != 0) ? \
vec8_loadu(p) : \
@@ -142,18 +142,18 @@
// One Newton iteration for reciprocal
#define k8inv_iter(x_,r_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const rr=(r_); \
- CCTK_REAL8_VEC const r=rr; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const r__=(r_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const r=r__; \
/* r + r * (1 - x*r) */ \
k8madd(r, k8nmsub(x, r, vec8_set1(1.0)), r); \
})
// Reciprocal: First estimate, then apply two Newton iterations
#define k8inv(x_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
CCTK_REAL8_VEC const r0 = k8inv_init(x); \
CCTK_REAL8_VEC const r1 = k8inv_iter(x,r0); \
CCTK_REAL8_VEC const r2 = k8inv_iter(x,r1); \
@@ -171,29 +171,29 @@
#define k8fabs(x) (__fpabs(x))
#define k8fmax(x_,y_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const y__=(y_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const y=y__; \
__fpsel(k8sub(y,x),x,y); \
})
#define k8fmin(x_,y_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const y__=(y_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const y=y__; \
__fpsel(k8sub(x,y),x,y); \
})
#define k8fnabs(x) (__fpnabs(x))
// Estimate for reciprocal square root
-#define k8rsqrt_init(x) (__fprsqrt(x))
+#define k8rsqrt_init(x) (__fprsqrte(x))
// One Newton iteration for reciprocal square root
#define k8rsqrt_iter(x_,rs_) \
({ \
CCTK_REAL8_VEC const x__=(x_); \
- CCTK_REAL8_VEC const x=x__; \
CCTK_REAL8_VEC const rs__=(rs_); \
+ CCTK_REAL8_VEC const x=x__; \
CCTK_REAL8_VEC const rs=rs__; \
/* rs (3/2 - x/2 rs^2) */ \
k8mul(rs, k8msub(vec8_set1(1.5), x2, k8mul(rs, rs))); \
@@ -219,17 +219,17 @@
// Expensive functions
#define K8REPL(f,x_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
vec8_set(f(vec8_elt0(x)), \
f(vec8_elt1(x))); \
})
#define K8REPL2(f,x_,a_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8 const aa=(a_); \
- CCTK_REAL8 const a=aa; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8 const a__=(a_); \
+ CCTK_REAL8 const a=a__; \
vec8_set(f(vec8_elt0(x),a), \
f(vec8_elt1(x),a)); \
})
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h
index b8a7d0b..7dc7d31 100644
--- a/src/vectors-8-SSE2.h
+++ b/src/vectors-8-SSE2.h
@@ -68,8 +68,8 @@
// original order is 01
#define vec8_swap10(x_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
_mm_shuffle_pd(x,x, _MM_SHUFFLE2(0,1)); \
})
@@ -90,8 +90,8 @@
#else
# define vec8_load_off1(p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
_mm_shuffle_pd(vec8_load((&p)[-1]), \
vec8_load((&p)[+1]), _MM_SHUFFLE2(0,1)); \
})
@@ -106,8 +106,8 @@
#else
# define vec8_loadu_maybe(off,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
(off) % CCTK_REAL8_VEC_SIZE == 0 ? \
vec8_load(p) : \
vec8_load_off1(p); \
@@ -119,8 +119,8 @@
# else
# define vec8_loadu_maybe3(off1,off2,off3,p_) \
({ \
- CCTK_REAL8 const& pp=(p_); \
- CCTK_REAL8 const& p=pp; \
+ CCTK_REAL8 const& p__=(p_); \
+ CCTK_REAL8 const& p=p__; \
((off2) % CCTK_REAL8_VEC_SIZE != 0 or \
(off3) % CCTK_REAL8_VEC_SIZE != 0) ? \
vec8_loadu(p) : \
@@ -153,15 +153,15 @@
// lines are usually larger than the CPU vector size)
# define vec8_store_nta_partial_lo(p_,x,n) \
({ \
- CCTK_REAL8& pp=(p_); \
- CCTK_REAL8& p=pp; \
+ CCTK_REAL8& p__=(p_); \
+ CCTK_REAL8& p=p__; \
_mm_storel_pd(&p,x); \
/* _mm_clflush(&p); */ \
})
# define vec8_store_nta_partial_hi(p_,x,n) \
({ \
- CCTK_REAL8& pp=(p_); \
- CCTK_REAL8& p=pp; \
+ CCTK_REAL8& p__=(p_); \
+ CCTK_REAL8& p=p__; \
_mm_storeh_pd(&p+1,x); \
/* _mm_clflush(&p+1); */ \
})
@@ -240,17 +240,17 @@ static const union {
// Expensive functions
#define K8REPL(f,x_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
vec8_set(f(vec8_elt0(x)), \
f(vec8_elt1(x))); \
})
#define K8REPL2(f,x_,a_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8 const aa=(a_); \
- CCTK_REAL8 const a=aa; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8 const a__=(a_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8 const a=a__; \
vec8_set(f(vec8_elt0(x),a), \
f(vec8_elt1(x),a)); \
})
@@ -268,12 +268,12 @@ static const union {
#elif 0
# define k8ifpos(x_,y_,z_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
- CCTK_REAL8_VEC const zz=(z_); \
- CCTK_REAL8_VEC const z=zz; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const y__=(y_); \
+ CCTK_REAL8_VEC const z__=(z_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const y=y__; \
+ CCTK_REAL8_VEC const z=z__; \
int const m = _mm_movemask_pd(x); \
CCTK_REAL8_VEC r; \
switch (m) { \
@@ -292,12 +292,12 @@ static const union {
# endif
# define k8ifpos(x_,y_,z_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
- CCTK_REAL8_VEC const zz=(z_); \
- CCTK_REAL8_VEC const z=zz; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const y__=(y_); \
+ CCTK_REAL8_VEC const z__=(z_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const y=y__; \
+ CCTK_REAL8_VEC const z=z__; \
vec8_set(k8sgn(vec8_elt0(x)) ? vec8_elt0(z) : vec8_elt0(y), \
k8sgn(vec8_elt1(x)) ? vec8_elt1(z) : vec8_elt1(y)); \
})
@@ -307,19 +307,19 @@ static const union {
double d;
} k8one_union = { 0x1ULL };
# define k8one (k8one_union.d)
-# define k8ifpos(x_,y_,z_) \
- ({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
- CCTK_REAL8_VEC const zz=(z_); \
- CCTK_REAL8_VEC const z=zz; \
- /* there is no _mm_srai_epi64(x, 63) */ \
- CCTK_REAL8_VEC const imask = \
- (__m128d)_mm_sub_epi64(_mm_srli_epi64((__m128i)x, 63), \
- (__m128i)_mm_set1_pd(k8one)); \
- /* (y & ~mask) | (z & mask); imask = ~mask */ \
- _mm_or_pd(_mm_and_pd(imask, y), _mm_andnot_pd(imask, z)); \
+# define k8ifpos(x_,y_,z_) \
+ ({ \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const y__=(y_); \
+ CCTK_REAL8_VEC const z__=(z_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8_VEC const y=y__; \
+ CCTK_REAL8_VEC const z=z__; \
+ /* there is no _mm_srai_epi64(x, 63) */ \
+ CCTK_REAL8_VEC const imask = \
+ (__m128d)_mm_sub_epi64(_mm_srli_epi64((__m128i)x, 63), \
+ (__m128i)_mm_set1_pd(k8one)); \
+ /* (y & ~mask) | (z & mask); imask = ~mask */ \
+ _mm_or_pd(_mm_and_pd(imask, y), _mm_andnot_pd(imask, z)); \
})
#endif
diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h
index 7785759..5e64ef4 100644
--- a/src/vectors-8-VSX.h
+++ b/src/vectors-8-VSX.h
@@ -88,17 +88,17 @@
// Expensive functions
#define K8REPL(f,x_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
vec8_set(f(vec8_elt0(x)), \
f(vec8_elt1(x))); \
})
#define K8REPL2(f,x_,a_) \
({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8 const aa=(a_); \
- CCTK_REAL8 const a=aa; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8 const a__=(a_); \
+ CCTK_REAL8_VEC const x=x__; \
+ CCTK_REAL8 const a=a__; \
vec8_set(f(vec8_elt0(x),a), \
f(vec8_elt1(x),a)); \
})