From 6bbe334ae29817b03d598ad977306fb861956702 Mon Sep 17 00:00:00 2001 From: eschnett Date: Thu, 22 Dec 2011 15:35:28 +0000 Subject: Make vectorisation macros safer git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@44 105869f7-3296-0410-a4ea-f4349344b45a --- src/vectors-4-Altivec.h | 44 ++++++++--------- src/vectors-4-SSE.h | 112 +++++++++++++++++++++---------------------- src/vectors-8-AVX.h | 8 ++-- src/vectors-8-DoubleHummer.h | 72 ++++++++++++++-------------- src/vectors-8-SSE2.h | 88 +++++++++++++++++----------------- src/vectors-8-VSX.h | 12 ++--- 6 files changed, 168 insertions(+), 168 deletions(-) diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h index a55c683..679c34e 100644 --- a/src/vectors-4-Altivec.h +++ b/src/vectors-4-Altivec.h @@ -67,10 +67,10 @@ // the non-temporal hint is probably ignored #define vec4_store_nta_partial_lo(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 3: (&p)[2]=x[2]; \ case 2: (&p)[1]=x[1]; \ @@ -79,10 +79,10 @@ }) #define vec4_store_nta_partial_hi(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 3: (&p)[1]=x[1]; \ case 2: (&p)[2]=x[2]; \ @@ -91,14 +91,14 @@ }) #define vec4_store_nta_partial_mid(p_,x_,nlo_,nhi_) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ - int const nnlo=(nlo_); \ - int const nlo=nnlo; \ - int const nnhi=(nhi_); \ - int const nhi=nnhi; \ + CCTK_REAL4 const& p__ =(p_); \ + CCTK_REAL4_VEC const x__ =(x_); \ + int const nlo__=(nlo_); \ + int const nhi__=(nhi_); \ + CCTK_REAL4 const& p =p__; \ + CCTK_REAL4_VEC const x =x__; \ + int const nlo=nlo__; \ + int const nhi=nhi__; \ if (nlo==3 and nhi==3) { \ (&p)[1]=x[1]; \ (&p)[2]=x[2]; \ @@ -137,8 +137,8 @@ // Expensive functions #define K4REPL(f,x_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ vec4_set(f(vec4_elt0(x)), \ f(vec4_elt1(x)), \ f(vec4_elt2(x)), \ @@ -146,10 +146,10 @@ }) #define K4REPL2(f,x_,a_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ - CCTK_REAL4 const aa=(a_); \ - CCTK_REAL4 const a=aa; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const a__=(a_); \ + CCTK_REAL4_VEC const x=x__; \ + CCTK_REAL4 const a=a__; \ vec4_set(f(vec4_elt0(x),a), \ f(vec4_elt1(x),a), \ f(vec4_elt2(x),a), \ diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h index 56ecb22..9152c55 100644 --- a/src/vectors-4-SSE.h +++ b/src/vectors-4-SSE.h @@ -58,20 +58,20 @@ // original order is 0123 #define vec4_swap1032(x_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ _mm_shuffle_ps(x,x, _MM_SHUFFLE(2,3,0,1)); \ }) #define vec4_swap2301(x_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ _mm_shuffle_ps(x,x, _MM_SHUFFLE(1,0,3,2)); \ }) #define vec4_swap3210(x_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ _mm_shuffle_ps(x,x, _MM_SHUFFLE(0,1,2,3)); \ }) @@ -92,8 +92,8 @@ #if defined(__PGI) # define vec4_elt(x_,d) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ CCTK_REAL4 a; \ if (d==0) a=vec4_elt0(x); \ else if (d==1) a=vec4_elt1(x); \ @@ -104,8 +104,8 @@ #else # define vec4_elt(x_,d) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ CCTK_REAL4 a; \ switch (d) { \ case 0: a=vec4_elt0(x); break; \ @@ -132,8 +132,8 @@ #else # define vec4_load_off1(p_) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4 const& p=p__; \ CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \ CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \ assert(0); \ @@ -142,16 +142,16 @@ }) # define vec4_load_off2(p_) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4 const& p=p__; \ CCTK_REAL4_VEC const lo=vec4_load((&p)[-2]); \ CCTK_REAL4_VEC const hi=vec4_load((&p)[+2]); \ _mm_shuffle_ps(lo,hi, _MM_SHUFFLE(1,0,3,2)); \ }) # define vec4_load_off1(p_) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4 const& p=p__; \ CCTK_REAL4_VEC const lo=vec4_load((&p)[-1]); \ CCTK_REAL4_VEC const hi=vec4_load((&p)[+3]); \ assert(0); \ @@ -169,8 +169,8 @@ #else # define vec4_loadu_maybe(off,p_) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4 const& p=p__; \ (off) % CCTK_REAL4_VEC_SIZE == 0 ? \ vec4_load(p) : \ vec4_loadu(p); \ @@ -190,10 +190,10 @@ #if ! VECTORISE_STREAMING_STORES || ! defined(__SSE4A__) # define vec4_store_nta_partial_lo(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 1: (&p)[0]=vec4_elt0(x); break; \ case 2: _mm_storel_ps(&p,x); break; \ @@ -202,10 +202,10 @@ }) # define vec4_store_nta_partial_hi(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 1: (&p)[3]=vec4_elt3(x); break; \ case 2: _mm_storeh_ps(&p+2,x); break; \ @@ -215,10 +215,10 @@ #else # define vec4_store_nta_partial_lo(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 1: \ _mm_stream_ss(&p,x); \ @@ -234,10 +234,10 @@ }) # define vec4_store_nta_partial_hi(p_,x_,n) \ ({ \ - CCTK_REAL4 const& pp=(p_); \ - CCTK_REAL4 const& p=pp; \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4 const& p__=(p_); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const& p=p__; \ + CCTK_REAL4_VEC const x=x__; \ switch (n) { \ case 1: \ _mm_stream_ss(&p+3, vec4_swap3210(x)); \ @@ -301,8 +301,8 @@ static const union { // Expensive functions #define K4REPL(f,x_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const x=x__; \ vec4_set(f(vec4_elt0(x)), \ f(vec4_elt1(x)), \ f(vec4_elt2(x)), \ @@ -310,10 +310,10 @@ static const union { }) #define K4REPL2(f,x_,a_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ - CCTK_REAL4 const aa=(a_); \ - CCTK_REAL4 const a=aa; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4 const a__=(a_); \ + CCTK_REAL4_VEC const x=x__; \ + CCTK_REAL4 const a=a__; \ vec4_set(f(vec4_elt0(x),a), \ f(vec4_elt1(x),a), \ f(vec4_elt2(x),a), \ @@ -335,26 +335,26 @@ static const union { # endif # define k4ifpos(x,y,z) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ - CCTK_REAL4_VEC const yy=(y_); \ - CCTK_REAL4_VEC const y=yy; \ - CCTK_REAL4_VEC const zz=(z_); \ - CCTK_REAL4_VEC const z=zz; \ - vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \ - k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \ - k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \ - k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const y__=(y_); \ + CCTK_REAL4_VEC const z__=(z_); \ + CCTK_REAL4_VEC const x=x__; \ + CCTK_REAL4_VEC const y=y__; \ + CCTK_REAL4_VEC const z=z__; \ + vec4_set(k4sgn(vec4_elt0(x)) ? vec4_elt0(z) : vec4_elt0(y), \ + k4sgn(vec4_elt1(x)) ? vec4_elt1(z) : vec4_elt1(y), \ + k4sgn(vec4_elt2(x)) ? vec4_elt2(z) : vec4_elt2(y), \ + k4sgn(vec4_elt3(x)) ? vec4_elt3(z) : vec4_elt3(y)); \ }) #else # define k4ifpos(x_,y_,z_) \ ({ \ - CCTK_REAL4_VEC const xx=(x_); \ - CCTK_REAL4_VEC const x=xx; \ - CCTK_REAL4_VEC const yy=(y_); \ - CCTK_REAL4_VEC const y=yy; \ - CCTK_REAL4_VEC const zz=(z_); \ - CCTK_REAL4_VEC const z=zz; \ + CCTK_REAL4_VEC const x__=(x_); \ + CCTK_REAL4_VEC const y__=(y_); \ + CCTK_REAL4_VEC const z__=(z_); \ + CCTK_REAL4_VEC const x=x__; \ + CCTK_REAL4_VEC const y=y__; \ + CCTK_REAL4_VEC const z=z__; \ CCTK_REAL4_VEC const mask = _mm_srai_epi32(x, 31); \ /* (y & ~mask) | (z & mask) */ \ _mm_or_ps(_mm_andnot_ps(mask, y), _mm_and_ps(mask, z)); \ diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h index eab4055..144d3b5 100644 --- a/src/vectors-8-AVX.h +++ b/src/vectors-8-AVX.h @@ -80,8 +80,8 @@ union k8const_t { #else # define vec8_loadu_maybe(off,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ (off) % CCTK_REAL8_VEC_SIZE == 0 ? \ vec8_load(p) : \ vec8_load_off1(p); \ @@ -93,8 +93,8 @@ union k8const_t { # else # define vec8_loadu_maybe3(off1,off2,off3,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ ((off2) % CCTK_REAL8_VEC_SIZE != 0 or \ (off3) % CCTK_REAL8_VEC_SIZE != 0) ? \ vec8_loadu(p) : \ diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h index e61425c..951ca5d 100644 --- a/src/vectors-8-DoubleHummer.h +++ b/src/vectors-8-DoubleHummer.h @@ -32,8 +32,8 @@ #define vec8_elt1(x) (__cimag(x)) #define vec8_elt(x_,d) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ CCTK_REAL8 a; \ switch (d) { \ case 0: a=vec8_elt0(x); break; \ @@ -52,16 +52,16 @@ #if ! VECTORISE_ALWAYS_USE_ALIGNED_LOADS # define vec8_load_off1(p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ vec8_set((&p)[0],(&p)[1]); \ }) #else #if 0 # define vec8_load_off1(p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ CCTK_REAL8_VEC const lo = __lfxd((CCTK_REAL8 *)(&p-1)); \ CCTK_REAL8_VEC const hi = __lfxd((CCTK_REAL8 *)(&p+1)); \ __fpsel(vec8_set(-1.0,+1.0),lo,hi); \ @@ -69,8 +69,8 @@ #endif # define vec8_load_off1(p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ CCTK_REAL8_VEC const lo = vec8_load((&p)[-1]); \ CCTK_REAL8_VEC const hi = vec8_load((&p)[+1]); \ __fxmr(__fpsel(vec8_set(+1.0,-1.0),lo,hi)); \ @@ -78,8 +78,8 @@ #endif #define vec8_loadu(p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ int const off = (ptrdiff_t)&p & 0xf; \ off==0 ? vec8_load(p) : vec8_load_off1(p); \ }) @@ -93,8 +93,8 @@ #else # define vec8_loadu_maybe(off,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ (off) % CCTK_REAL8_VEC_SIZE == 0 ? \ vec8_load(p) : \ vec8_load_off1(p); \ @@ -105,8 +105,8 @@ # else # define vec8_loadu_maybe3(off1,off2,off3,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ ((off2) % CCTK_REAL8_VEC_SIZE != 0 || \ (off3) % CCTK_REAL8_VEC_SIZE != 0) ? \ vec8_loadu(p) : \ @@ -142,18 +142,18 @@ // One Newton iteration for reciprocal #define k8inv_iter(x_,r_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const rr=(r_); \ - CCTK_REAL8_VEC const r=rr; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const r__=(r_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const r=r__; \ /* r + r * (1 - x*r) */ \ k8madd(r, k8nmsub(x, r, vec8_set1(1.0)), r); \ }) // Reciprocal: First estimate, then apply two Newton iterations #define k8inv(x_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ CCTK_REAL8_VEC const r0 = k8inv_init(x); \ CCTK_REAL8_VEC const r1 = k8inv_iter(x,r0); \ CCTK_REAL8_VEC const r2 = k8inv_iter(x,r1); \ @@ -171,29 +171,29 @@ #define k8fabs(x) (__fpabs(x)) #define k8fmax(x_,y_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const y__=(y_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const y=y__; \ __fpsel(k8sub(y,x),x,y); \ }) #define k8fmin(x_,y_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const y__=(y_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const y=y__; \ __fpsel(k8sub(x,y),x,y); \ }) #define k8fnabs(x) (__fpnabs(x)) // Estimate for reciprocal square root -#define k8rsqrt_init(x) (__fprsqrt(x)) +#define k8rsqrt_init(x) (__fprsqrte(x)) // One Newton iteration for reciprocal square root #define k8rsqrt_iter(x_,rs_) \ ({ \ CCTK_REAL8_VEC const x__=(x_); \ - CCTK_REAL8_VEC const x=x__; \ CCTK_REAL8_VEC const rs__=(rs_); \ + CCTK_REAL8_VEC const x=x__; \ CCTK_REAL8_VEC const rs=rs__; \ /* rs (3/2 - x/2 rs^2) */ \ k8mul(rs, k8msub(vec8_set1(1.5), x2, k8mul(rs, rs))); \ @@ -219,17 +219,17 @@ // Expensive functions #define K8REPL(f,x_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ vec8_set(f(vec8_elt0(x)), \ f(vec8_elt1(x))); \ }) #define K8REPL2(f,x_,a_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8 const aa=(a_); \ - CCTK_REAL8 const a=aa; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8 const a__=(a_); \ + CCTK_REAL8 const a=a__; \ vec8_set(f(vec8_elt0(x),a), \ f(vec8_elt1(x),a)); \ }) diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h index b8a7d0b..7dc7d31 100644 --- a/src/vectors-8-SSE2.h +++ b/src/vectors-8-SSE2.h @@ -68,8 +68,8 @@ // original order is 01 #define vec8_swap10(x_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ _mm_shuffle_pd(x,x, _MM_SHUFFLE2(0,1)); \ }) @@ -90,8 +90,8 @@ #else # define vec8_load_off1(p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ _mm_shuffle_pd(vec8_load((&p)[-1]), \ vec8_load((&p)[+1]), _MM_SHUFFLE2(0,1)); \ }) @@ -106,8 +106,8 @@ #else # define vec8_loadu_maybe(off,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ (off) % CCTK_REAL8_VEC_SIZE == 0 ? \ vec8_load(p) : \ vec8_load_off1(p); \ @@ -119,8 +119,8 @@ # else # define vec8_loadu_maybe3(off1,off2,off3,p_) \ ({ \ - CCTK_REAL8 const& pp=(p_); \ - CCTK_REAL8 const& p=pp; \ + CCTK_REAL8 const& p__=(p_); \ + CCTK_REAL8 const& p=p__; \ ((off2) % CCTK_REAL8_VEC_SIZE != 0 or \ (off3) % CCTK_REAL8_VEC_SIZE != 0) ? \ vec8_loadu(p) : \ @@ -153,15 +153,15 @@ // lines are usually larger than the CPU vector size) # define vec8_store_nta_partial_lo(p_,x,n) \ ({ \ - CCTK_REAL8& pp=(p_); \ - CCTK_REAL8& p=pp; \ + CCTK_REAL8& p__=(p_); \ + CCTK_REAL8& p=p__; \ _mm_storel_pd(&p,x); \ /* _mm_clflush(&p); */ \ }) # define vec8_store_nta_partial_hi(p_,x,n) \ ({ \ - CCTK_REAL8& pp=(p_); \ - CCTK_REAL8& p=pp; \ + CCTK_REAL8& p__=(p_); \ + CCTK_REAL8& p=p__; \ _mm_storeh_pd(&p+1,x); \ /* _mm_clflush(&p+1); */ \ }) @@ -240,17 +240,17 @@ static const union { // Expensive functions #define K8REPL(f,x_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ vec8_set(f(vec8_elt0(x)), \ f(vec8_elt1(x))); \ }) #define K8REPL2(f,x_,a_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8 const aa=(a_); \ - CCTK_REAL8 const a=aa; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8 const a__=(a_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8 const a=a__; \ vec8_set(f(vec8_elt0(x),a), \ f(vec8_elt1(x),a)); \ }) @@ -268,12 +268,12 @@ static const union { #elif 0 # define k8ifpos(x_,y_,z_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ - CCTK_REAL8_VEC const zz=(z_); \ - CCTK_REAL8_VEC const z=zz; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const y__=(y_); \ + CCTK_REAL8_VEC const z__=(z_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const y=y__; \ + CCTK_REAL8_VEC const z=z__; \ int const m = _mm_movemask_pd(x); \ CCTK_REAL8_VEC r; \ switch (m) { \ @@ -292,12 +292,12 @@ static const union { # endif # define k8ifpos(x_,y_,z_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ - CCTK_REAL8_VEC const zz=(z_); \ - CCTK_REAL8_VEC const z=zz; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const y__=(y_); \ + CCTK_REAL8_VEC const z__=(z_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const y=y__; \ + CCTK_REAL8_VEC const z=z__; \ vec8_set(k8sgn(vec8_elt0(x)) ? vec8_elt0(z) : vec8_elt0(y), \ k8sgn(vec8_elt1(x)) ? vec8_elt1(z) : vec8_elt1(y)); \ }) @@ -307,19 +307,19 @@ static const union { double d; } k8one_union = { 0x1ULL }; # define k8one (k8one_union.d) -# define k8ifpos(x_,y_,z_) \ - ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ - CCTK_REAL8_VEC const zz=(z_); \ - CCTK_REAL8_VEC const z=zz; \ - /* there is no _mm_srai_epi64(x, 63) */ \ - CCTK_REAL8_VEC const imask = \ - (__m128d)_mm_sub_epi64(_mm_srli_epi64((__m128i)x, 63), \ - (__m128i)_mm_set1_pd(k8one)); \ - /* (y & ~mask) | (z & mask); imask = ~mask */ \ - _mm_or_pd(_mm_and_pd(imask, y), _mm_andnot_pd(imask, z)); \ +# define k8ifpos(x_,y_,z_) \ + ({ \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const y__=(y_); \ + CCTK_REAL8_VEC const z__=(z_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8_VEC const y=y__; \ + CCTK_REAL8_VEC const z=z__; \ + /* there is no _mm_srai_epi64(x, 63) */ \ + CCTK_REAL8_VEC const imask = \ + (__m128d)_mm_sub_epi64(_mm_srli_epi64((__m128i)x, 63), \ + (__m128i)_mm_set1_pd(k8one)); \ + /* (y & ~mask) | (z & mask); imask = ~mask */ \ + _mm_or_pd(_mm_and_pd(imask, y), _mm_andnot_pd(imask, z)); \ }) #endif diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h index 7785759..5e64ef4 100644 --- a/src/vectors-8-VSX.h +++ b/src/vectors-8-VSX.h @@ -88,17 +88,17 @@ // Expensive functions #define K8REPL(f,x_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ vec8_set(f(vec8_elt0(x)), \ f(vec8_elt1(x))); \ }) #define K8REPL2(f,x_,a_) \ ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8 const aa=(a_); \ - CCTK_REAL8 const a=aa; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8 const a__=(a_); \ + CCTK_REAL8_VEC const x=x__; \ + CCTK_REAL8 const a=a__; \ vec8_set(f(vec8_elt0(x),a), \ f(vec8_elt1(x),a)); \ }) -- cgit v1.2.3