From 6005a49ff06f8368b516c168e5a396530c1193d4 Mon Sep 17 00:00:00 2001 From: eschnett Date: Fri, 14 Sep 2012 18:48:46 +0000 Subject: Add support for (dynamic) if-then expressions Add types for holding integers and booleans, and vectors thereof. Add if-then expressions. Add floating point comparisons. Update tests. git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@66 105869f7-3296-0410-a4ea-f4349344b45a --- src/test.cc | 59 +++++++++++++++++++++++++++++++++-- src/vectors-4-Altivec.h | 6 ++++ src/vectors-4-SSE.h | 19 ++++++++++-- src/vectors-4-default.h | 24 +++++++++++++- src/vectors-8-AVX.h | 23 ++++++++++++-- src/vectors-8-DoubleHummer.h | 40 +++++++++++++++++------- src/vectors-8-QPX.h | 18 +++++++++++ src/vectors-8-SSE2.h | 19 ++++++++++-- src/vectors-8-VSX.h | 74 ++++++++++++++++++++++++++++++++++++-------- src/vectors-8-default.h | 24 +++++++++++++- src/vectors.cc | 13 ++++---- src/vectors.h | 37 +++++++++++++++++++++- 12 files changed, 313 insertions(+), 43 deletions(-) diff --git a/src/test.cc b/src/test.cc index 73dd9e2..ed1ab85 100644 --- a/src/test.cc +++ b/src/test.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include using namespace std; @@ -21,17 +22,18 @@ inline CCTK_REAL my_copysign (CCTK_REAL const x, CCTK_REAL const y) inline int my_isnan (CCTK_REAL const x) { - return std::isnan(x); + using namespace std; + return isnan(x); } inline int my_signbit (CCTK_REAL const x) { - return std::signbit(x); + using namespace std; + return signbit(x); } inline CCTK_REAL my_sgn (CCTK_REAL const x) { - using namespace std; return x == (CCTK_REAL)0.0 ? (CCTK_REAL)0.0 : my_copysign((CCTK_REAL)1.0, x); } @@ -83,6 +85,27 @@ inline CCTK_REAL my_sgn (CCTK_REAL const x) } \ } while(0) +#define VECBITTEST(testname, vecexpr, scalarexpr) \ + do { \ + if (verbose) { \ + CCTK_VInfo (CCTK_THORNSTRING, "Test %s...", testname); \ + } \ + CCTK_BOOLEAN_VEC rv = (vecexpr); \ + for (int i=0; i b[i]?kltrue1:klfalse1); + VECBITTEST("kcmpge", kcmpge(av,bv), a[i]>=b[i]?kltrue1:klfalse1); + VECBITTEST("kcmplt", kcmplt(av,bv), a[i]< b[i]?kltrue1:klfalse1); + VECBITTEST("kcmple", kcmple(av,bv), a[i]<=b[i]?kltrue1:klfalse1); if (passed != numtests) { CCTK_VWarn(CCTK_WARN_ALERT, __LINE__, __FILE__, CCTK_THORNSTRING, diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h index d6281c8..3975c77 100644 --- a/src/vectors-4-Altivec.h +++ b/src/vectors-4-Altivec.h @@ -17,6 +17,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL4_VEC_SIZE 4 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER4 CCTK_REAL4 +#define CCTK_BOOLEAN4 CCTK_REAL4 +#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC +#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC + // Create vectors, extract vector elements diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h index 9af7189..e6ff34e 100644 --- a/src/vectors-4-SSE.h +++ b/src/vectors-4-SSE.h @@ -48,6 +48,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL4_VEC_SIZE 4 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER4 CCTK_REAL4 +#define CCTK_BOOLEAN4 CCTK_REAL4 +#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC +#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC + union k4const_t { @@ -385,8 +391,10 @@ static const k4const_t k4one = { f: { 1.0f, 1.0f, 1.0f, 1.0f, }}; #define k4tan(x) K4REPL(tanf,x) #define k4tanh(x) K4REPL(tanhf,x) -static const k4const_t k4lfalse = {{ +0U, +0U, +0U, +0U, }}; -static const k4const_t k4ltrue = {{ -1U, -1U, -1U, -1U, }}; +static const k4const_t k4lfalse_ = {{ +0U, +0U, +0U, +0U, }}; +static const k4const_t k4ltrue_ = {{ -1U, -1U, -1U, -1U, }}; +#define k4lfalse (k4lfalse_.vf) +#define k4ltrue (k4ltrue_.vf) #define k4lnot(x) (_mm_xor_ps(k4ltrue,x)) #define k4land(x,y) (_mm_and_ps(x,y)) #define k4lor(x,y) (_mm_or_ps(x,y)) @@ -442,3 +450,10 @@ static const k4const_t k4ltrue = {{ -1U, -1U, -1U, -1U, }}; _mm_or_ps(_mm_and_ps(x, y), _mm_andnot_ps(x, z)); \ }) #endif + +#define k4cmpeq(x,y) (_mm_cmpeq_ps(x,y)) +#define k4cmpne(x,y) (_mm_cmpneq_ps(x,y)) +#define k4cmpgt(x,y) (_mm_cmpgt_ps(x,y)) +#define k4cmpge(x,y) (_mm_cmpge_ps(x,y)) +#define k4cmplt(x,y) (_mm_cmplt_ps(x,y)) +#define k4cmple(x,y) (_mm_cmple_ps(x,y)) diff --git a/src/vectors-4-default.h b/src/vectors-4-default.h index d1b2135..9b2f7f6 100644 --- a/src/vectors-4-default.h +++ b/src/vectors-4-default.h @@ -19,6 +19,12 @@ // Number of vector elements in a vector #define CCTK_REAL4_VEC_SIZE 1 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER4 CCTK_REAL4 +#define CCTK_BOOLEAN4 CCTK_REAL4 +#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC +#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC + // Create a vector replicating a scalar @@ -114,4 +120,20 @@ # define k4signbit(x) (signbit(x)) #endif -#define k4ifthen(x,y,z) (k4signbit(x)?(y):(z)) +#define k4l2r(x_) ({ CCTK_INT4 x__=(x_); CCTK_INT4 x=x__; *(CCTK_REAL4*)&x; }) +#define k4r2l(x_) ({ CCTK_REAL4 x__=(x_); CCTK_REAL4 x=x__; *(CCTK_INT4*)&x; }) +#define k4lfalse k4l2r(0) +#define k4ltrue k4l2r(1) +#define k4lnot(x) k4l2r(!k4r2l(x)) +#define k4land(x,y) k4l2r(k4r2l(x) && k4r2l(y)) +#define k4lor(x,y) k4l2r(k4r2l(x) || k4r2l(y)) +#define k4lxor(x,y) k4l2r(!k4r2l(x) != !k4r2l(y)) + +#define k4ifthen(x,y,z) ((x)?(y):(z)) + +#define k4cmpeq(x,y) k4l2r((x)==(y)) +#define k4cmpne(x,y) k4l2r((x)!=(y)) +#define k4cmpgt(x,y) k4l2r((x)>(y)) +#define k4cmpge(x,y) k4l2r((x)>=(y)) +#define k4cmplt(x,y) k4l2r((x)<(y)) +#define k4cmple(x,y) k4l2r((x)<=(y)) diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h index 1ba2583..6882523 100644 --- a/src/vectors-8-AVX.h +++ b/src/vectors-8-AVX.h @@ -31,6 +31,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL8_VEC_SIZE 4 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 CCTK_REAL8 +#define CCTK_BOOLEAN8 CCTK_REAL8 +#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC +#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC + union k8const_t { @@ -299,10 +305,21 @@ static const k8const_t k8one = { f: { 1.0, 1.0, 1.0, 1.0, }}; #define k8tan(x) K8REPL(tan,x) #define k8tanh(x) K8REPL(tanh,x) -static const k8const_t k8lfalse = {{ K8_ZERO, K8_ZERO, K8_ZERO, K8_ZERO, }}; -static const k8const_t k8ltrue = {{ K8_IMIN, K8_IMIN, K8_IMIN, K8_IMIN, }}; -#define k8lnot(x) (_mm256_xor_pd(k8sign_mask,x)) +static const k8const_t k8lfalse_ = + {{ K8_ZERO, K8_ZERO, K8_ZERO, K8_ZERO, }}; +static const k8const_t k8ltrue_ = + {{ K8_NOTZERO, K8_NOTZERO, K8_NOTZERO, K8_NOTZERO, }}; +#define k8lfalse (k8lfalse_.vf) +#define k8ltrue (k8ltrue_.vf) +#define k8lnot(x) (_mm256_xor_pd(k8ltrue,x)) #define k8land(x,y) (_mm256_and_pd(x,y)) #define k8lor(x,y) (_mm256_or_pd(x,y)) #define k8lxor(x,y) (_mm256_xor_pd(x,y)) #define k8ifthen(x,y,z) (_mm256_blendv_pd(z,y,x)) + +#define k8cmpeq(x,y) (_mm256_cmp_pd(x,y,_CMP_EQ_OQ)) +#define k8cmpne(x,y) (_mm256_cmp_pd(x,y,_CMP_NEQ_OQ)) +#define k8cmpgt(x,y) (_mm256_cmp_pd(x,y,_CMP_GT_OQ)) +#define k8cmpge(x,y) (_mm256_cmp_pd(x,y,_CMP_GE_OQ)) +#define k8cmplt(x,y) (_mm256_cmp_pd(x,y,_CMP_LT_OQ)) +#define k8cmple(x,y) (_mm256_cmp_pd(x,y,_CMP_LE_OQ)) diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h index ffb04b4..0189989 100644 --- a/src/vectors-8-DoubleHummer.h +++ b/src/vectors-8-DoubleHummer.h @@ -3,6 +3,8 @@ // Use the type double _Complex directly, without introducing a wrapper class // Use macros instead of inline functions +// See + #include @@ -21,6 +23,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL8_VEC_SIZE 2 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 CCTK_REAL8 +#define CCTK_BOOLEAN8 CCTK_REAL8 +#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC +#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC + union k8const_t { @@ -195,7 +203,9 @@ union k8const_t { #define k8nmsub(x,y,z) (__fpnmsub(z,x,y)) // Cheap functions -#define k8fabs(x) (__fpabs(x)) +// TODO: handle -0 correctly +#define k8copysign(x,y) (k8ifthen(y,k8fabs(x),k8fnabs(x))) +#define k8fabs(x) (__fpabs(x)) #define k8fmax(x_,y_) \ ({ \ CCTK_REAL8_VEC const x__=(x_); \ @@ -220,11 +230,8 @@ static const k8const_t k8mone = {{ -1.0, -1.0, }}; ({ \ CCTK_REAL_VEC x__=(x_); \ CCTK_REAL_VEC x=x__; \ - /* TODO: this assumes that __fpsel says -0>=+0; \ - if this is not so, we need k8abs(x) instead of x for iszero. */ \ - CCTK_REAL_VEC iszero = k8land(__fpsel(x, k8lfalse.vf, k8ltrue.vf), \ - __fpsel(k8neg(x), k8lfalse.vf, k8ltrue.vf)); \ - CCTK_REAL_VEC signedone = __fpsel(x, k8mone.vf, k8one.vf); \ + CCTK_REAL_VEC iszero = k8fnabs(x); \ + CCTK_REAL_VEC signedone = k8ifthen(x, k8one.vf, k8mone.vf); \ k8ifthen(iszero, k8zero.vf, signedone); \ }) // Estimate for reciprocal square root @@ -301,10 +308,21 @@ static const k8const_t k8mone = {{ -1.0, -1.0, }}; #define k8tan(x) K8REPL(tan,x) #define k8tanh(x) K8REPL(tanh,x) -static const k8const_t k8lfalse = {{ -1.0, -1.0, }}; -static const k8const_t k8ltrue = {{ +1.0, +1.0, }}; -#define k8lnot(x) (__fpneg(x)) -#define k8land(x,y) (k8ifthen(x,y,k8lfalse.vf)) -#define k8lor(x,y) (k8ifthen(x,k8ltrue.vf,y)) +// canonical true is +1.0, canonical false is -1.0 +// >=0 is true, -0 is true (?), nan is false (?) +static const k8const_t k8lfalse_ = {{ -1.0, -1.0, }}; +static const k8const_t k8ltrue_ = {{ +1.0, +1.0, }}; +#define k8lfalse (k8lfalse_.vf) +#define k8ltrue (k8ltrue_.vf) +#define k8lnot(x) (k8ifthen(x,k8lfalse,k8ltrue)) +#define k8land(x,y) (k8ifthen(x,y,k8lfalse)) +#define k8lor(x,y) (k8ifthen(x,k8ltrue,y)) #define k8lxor(x,y) (k8ifthen(x,k8lnot(y),y)) #define k8ifthen(x,y,z) (__fpsel(x,z,y)) + +#define k8cmpeq(x,y) (__fpnabs((x)-(y))) +#define k8cmpne(x,y) (k8lnot(__fpnabs((x)-(y)))) +#define k8cmpgt(x,y) (k8lnot(k8cmple(x,y))) +#define k8cmpge(x,y) ((x)-(y)) +#define k8cmplt(x,y) (k8lnot(k8cmpge(x,y))) +#define k8cmple(x,y) ((y)-(x)) diff --git a/src/vectors-8-QPX.h b/src/vectors-8-QPX.h index 80762fe..93a7707 100644 --- a/src/vectors-8-QPX.h +++ b/src/vectors-8-QPX.h @@ -6,6 +6,8 @@ // Note: bgxlC_r does not like const declarations, so we need to cast // them away and/or omit them everywhere +// See + #include @@ -19,11 +21,18 @@ #define vec8_architecture "QPX" // Vector type corresponding to CCTK_REAL +// TODO: Use a typedef to avoid the "const" issue? Or use a struct? #define CCTK_REAL8_VEC vector4double // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL8_VEC_SIZE 4 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 CCTK_REAL8 +#define CCTK_BOOLEAN8 CCTK_REAL8 +#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC +#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC + union k8const_t { @@ -300,6 +309,8 @@ union k8const_t { #define k8tan(x) K8REPL(tan,x) #define k8tanh(x) K8REPL(tanh,x) +// canonical true is +1.0, canonical false is -1.0 +// >=0 is true, -0 is true, nan is false #define k8lfalse \ ({ CCTK_REAL8_VEC dummy; vec_logical(dummy,dummy,0x0); }) #define k8ltrue \ @@ -309,3 +320,10 @@ union k8const_t { #define k8lor(x,y) (vec_or(x,y)) #define k8lxor(x,y) (vec_xor(x,y)) #define k8ifthen(x,y,z) (vec_sel(z,x,y)) + +#define k8cmpeq(x,y) (vec_cmpeq(x,y)) +#define k8cmpne(x,y) (vec_not(vec_cmpeq(x,y))) +#define k8cmpgt(x,y) (vec_cmpgt(x,y)) +#define k8cmpge(x,y) (vec_not(vec_cmplt(x,y))) +#define k8cmplt(x,y) (vec_cmplt(x,y)) +#define k8cmple(x,y) (vec_not(vec_cmpgt(x,y))) diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h index 1b6aeb5..cfefbce 100644 --- a/src/vectors-8-SSE2.h +++ b/src/vectors-8-SSE2.h @@ -58,6 +58,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL8_VEC_SIZE 2 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 CCTK_REAL8 +#define CCTK_BOOLEAN8 CCTK_REAL8 +#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC +#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC + union k8const_t { @@ -333,8 +339,10 @@ static const k8const_t k8one = { f: { 1.0, 1.0, }}; #define k8tan(x) K8REPL(tan,x) #define k8tanh(x) K8REPL(tanh,x) -static const k8const_t k8lfalse = {{ +0LL, +0LL, }}; -static const k8const_t k8ltrue = {{ -1LL, -1LL, }}; +static const k8const_t k8lfalse_ = {{ +0LL, +0LL, }}; +static const k8const_t k8ltrue_ = {{ -1LL, -1LL, }}; +#define k8lfalse (k8lfalse_.vf) +#define k8ltrue (k8ltrue_.vf) #define k8lnot(x) (_mm_xor_pd(k8ltrue,x)) #define k8land(x,y) (_mm_and_pd(x,y)) #define k8lor(x,y) (_mm_or_pd(x,y)) @@ -412,3 +420,10 @@ static const k8const_t k8ione = {{ 0x1ULL, 0x1ULL, }}; _mm_or_pd(_mm_and_pd(x, y), _mm_andnot_pd(x, z)); \ }) #endif + +#define k8cmpeq(x,y) (_mm_cmpeq_pd(x,y)) +#define k8cmpne(x,y) (_mm_cmpneq_pd(x,y)) +#define k8cmpgt(x,y) (_mm_cmpgt_pd(x,y)) +#define k8cmpge(x,y) (_mm_cmpge_pd(x,y)) +#define k8cmplt(x,y) (_mm_cmplt_pd(x,y)) +#define k8cmple(x,y) (_mm_cmple_pd(x,y)) diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h index 07f19d6..35af574 100644 --- a/src/vectors-8-VSX.h +++ b/src/vectors-8-VSX.h @@ -3,6 +3,8 @@ // Use the type vector double directly, without introducing a wrapper class // Use macros instead of inline functions +// See + #include @@ -17,6 +19,12 @@ // Number of vector elements in a CCTK_REAL_VEC #define CCTK_REAL8_VEC_SIZE 2 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 long long +#define CCTK_BOOLEAN8 long long +#define CCTK_INTEGER8_VEC vector long long +#define CCTK_BOOLEAN8_VEC vector bool long long + // Create vectors, extract vector elements @@ -55,6 +63,25 @@ // stvxl instruction doesn't exist for double precision #define vec8_store_nta(p,x) vec8_store(p,x) +// Store a partial vector (aligned and non-temporal) +#define vec8_store_partial_prepare(i,imin,imax) \ + bool const v8stp_lo = (i)>=(imin); \ + bool const v8stp_hi = (i)+CCTK_REAL8_VEC_SIZE-1<(imax) +#define vec8_store_nta_partial(p_,x_) \ + ({ \ + CCTK_REAL8& p__=(p_); \ + CCTK_REAL8& p=p__; \ + CCTK_REAL8_VEC const x__=(x_); \ + CCTK_REAL8_VEC const x=x__; \ + if (CCTK_BUILTIN_EXPECT(v8stp_lo and v8stp_hi, true)) { \ + vec8_store(p,x); \ + } else if (v8stp_lo) { \ + (&p)[0]=vec8_elt0(x); \ + } else if (v8stp_hi) { \ + (&p)[1]=vec8_elt1(x); \ + } \ + }) + // Store a lower or higher partial vector (aligned and non-temporal); // the non-temporal hint is probably ignored #define vec8_store_nta_partial_lo(p,x,n) ((&(p))[0]=(x)[0]) @@ -80,10 +107,20 @@ #define k8nmsub(x,y,z) (vec_nmsub(x,y,z)) // Cheap functions -#define k8fabs(x) (vec_abs(x)) -#define k8fmax(x,y) (vec_max(x,y)) -#define k8fmin(x,y) (vec_min(x,y)) -#define k8fnabs(x) (vec_nabs(x)) +#define k8copysign(x,y) (vec_cpsgn(y,x)) +#define k8fabs(x) (vec_abs(x)) +#define k8fmax(x,y) (vec_max(x,y)) +#define k8fmin(x,y) (vec_min(x,y)) +#define k8fnabs(x) (vec_nabs(x)) +#define k8sgn(x_) \ + ({ \ + CCTK_REAL8_VEC x__=(x_); \ + CCTK_REAL8_VEC x=x__; \ + CCTK_BOOLEAN8_VEC iszero = k8cmpeq(x,vec8_set1((CCTK_REAL8)0.0)); \ + CCTK_REAL8_VEC signedone = k8copysign(vec8_set1((CCTK_REAL8)1.0),x); \ + k8ifthen(iszero, vec8_set1((CCTK_REAL8)0.0), signedone); \ + }) +#define k8sqrt(x) (vec_sqrt(x)) // Expensive functions #define K8REPL(f,x_) \ @@ -129,13 +166,24 @@ #define k8tan(x) K8REPL(tan,x) #define k8tanh(x) K8REPL(tanh,x) -/* #define k8ifmsb(x,y,z) \ */ -/* (vec_sel((z), (y), vec_sra(vec_convert((x), &(vector long long*)0), 63))) */ - -#define k8lfalse (vec_splats(+0LL)) -#define k8ltrue (vec_splats(-1LL)) -#define k8lnot(x) (vec_xor(x,k8ltrue)) -#define k8land(x,y,z) (vec_and(x,y)) -#define k8lor(x,y,z) (vec_or(x,y)) -#define k8lxor(x,y,z) (vec_xor(x,y)) +// canonical true is -1LL, canonical false is 0LL +// truth values are interpreted bit-wise +#define k8lfalse ({ CCTK_BOOLEAN8_VEC dummy; vec_xor(dummy,dummy); }) +#define k8ltrue (k8lnot(k8lfalse)) +#define k8lnot(x_) \ + ({ \ + CCTK_BOOLEAN8_VEC x__=(x_); \ + CCTK_BOOLEAN8_VEC x=x__; \ + vec_nor(x,x); \ + }) +#define k8land(x,y) (vec_and(x,y)) +#define k8lor(x,y) (vec_or(x,y)) +#define k8lxor(x,y) (vec_xor(x,y)) #define k8ifthen(x,y,z) (vec_sel(z,y,x)) + +#define k8cmpeq(x,y) (vec_cmpeq(x,y)) +#define k8cmpne(x,y) (k8lnot(vec_cmpeq(x,y))) +#define k8cmpgt(x,y) (vec_cmpgt(x,y)) +#define k8cmpge(x,y) (vec_cmpge(x,y)) +#define k8cmplt(x,y) (vec_cmplt(x,y)) +#define k8cmple(x,y) (vec_cmple(x,y)) diff --git a/src/vectors-8-default.h b/src/vectors-8-default.h index e9e2734..d55e29e 100644 --- a/src/vectors-8-default.h +++ b/src/vectors-8-default.h @@ -19,6 +19,12 @@ // Number of vector elements in a vector #define CCTK_REAL8_VEC_SIZE 1 +// Integer and boolean types corresponding to this real type +#define CCTK_INTEGER8 CCTK_REAL8 +#define CCTK_BOOLEAN8 CCTK_REAL8 +#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC +#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC + // Create a vector replicating a scalar @@ -112,4 +118,20 @@ # define k8signbit(x) (signbit(x)) #endif -#define k8ifthen(x,y,z) (k8signbit(x)?(y):(z)) +#define k8l2r(x_) ({ CCTK_INT8 x__=(x_); CCTK_INT8 x=x__; *(CCTK_REAL8*)&x; }) +#define k8r2l(x_) ({ CCTK_REAL8 x__=(x_); CCTK_REAL8 x=x__; *(CCTK_INT8*)&x; }) +#define k8lfalse k8l2r(0) +#define k8ltrue k8l2r(1) +#define k8lnot(x) k8l2r(!k8r2l(x)) +#define k8land(x,y) k8l2r(k8r2l(x) && k8r2l(y)) +#define k8lor(x,y) k8l2r(k8r2l(x) || k8r2l(y)) +#define k8lxor(x,y) k8l2r(!k8r2l(x) != !k8r2l(y)) + +#define k8ifthen(x,y,z) ((x)?(y):(z)) + +#define k8cmpeq(x,y) k8l2r((x)==(y)) +#define k8cmpne(x,y) k8l2r((x)!=(y)) +#define k8cmpgt(x,y) k8l2r((x)>(y)) +#define k8cmpge(x,y) k8l2r((x)>=(y)) +#define k8cmplt(x,y) k8l2r((x)<(y)) +#define k8cmple(x,y) k8l2r((x)<=(y)) diff --git a/src/vectors.cc b/src/vectors.cc index ffff4be..489429b 100644 --- a/src/vectors.cc +++ b/src/vectors.cc @@ -1,13 +1,14 @@ - -#include "cctk.h" -#include "cctk_Arguments.h" -#include "cctk_Parameters.h" #include "vectors.h" +#include +#include +#include + extern "C" int Vectors_Startup(void) { - CCTK_VInfo(CCTK_THORNSTRING, "Using vector size %d for architecture %s", - CCTK_REAL_VEC_SIZE, vec_architecture); + CCTK_VInfo(CCTK_THORNSTRING, + "Using vector size %d for architecture %s", + CCTK_REAL_VEC_SIZE, vec_architecture); return 0; } diff --git a/src/vectors.h b/src/vectors.h index 9ed8367..c87446e 100644 --- a/src/vectors.h +++ b/src/vectors.h @@ -22,7 +22,7 @@ # else # include "vectors-8-SSE2.h" # endif -# elif defined(_ARCH_QP) // Blue Gene/Q QPX +# elif defined(__bgq__) && defined(__VECTOR4DOUBLE__) // Blue Gene/Q QPX # include "vectors-8-QPX.h" # elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Power VSX # include "vectors-8-VSX.h" @@ -50,6 +50,10 @@ # define CCTK_REAL_VEC CCTK_REAL4_VEC # define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE +# define CCTK_INTEGER CCTK_INTEGER4 +# define CCTK_BOOLEAN CCTK_BOOLEAN4 +# define CCTK_INTEGER_VEC CCTK_INTEGER4_VEC +# define CCTK_BOOLEAN_VEC CCTK_BOOLEAN4_VEC # define vec_set1 vec4_set1 # define vec_set vec4_set @@ -105,14 +109,31 @@ # define ktan k4tan # define ktanh k4tanh +# define klfalse k4lfalse +# define kltrue k4ltrue +# define klnot k4lnot +# define kland k4land +# define klor k4lor +# define klxor k4lxor # define kifthen k4ifthen +# define kcmpeq k4cmpeq +# define kcmpne k4cmpne +# define kcmpgt k4cmpgt +# define kcmpge k4cmpge +# define kcmplt k4cmplt +# define kcmple k4cmple + #elif defined(CCTK_REAL_PRECISION_8) # define vec_architecture vec8_architecture # define CCTK_REAL_VEC CCTK_REAL8_VEC # define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE +# define CCTK_INTEGER CCTK_INTEGER8 +# define CCTK_BOOLEAN CCTK_BOOLEAN8 +# define CCTK_INTEGER_VEC CCTK_INTEGER8_VEC +# define CCTK_BOOLEAN_VEC CCTK_BOOLEAN8_VEC # define vec_set1 vec8_set1 # define vec_set vec8_set @@ -168,8 +189,21 @@ # define ktan k8tan # define ktanh k8tanh +# define klfalse k8lfalse +# define kltrue k8ltrue +# define klnot k8lnot +# define kland k8land +# define klor k8lor +# define klxor k8lxor # define kifthen k8ifthen +# define kcmpeq k8cmpeq +# define kcmpne k8cmpne +# define kcmpgt k8cmpgt +# define kcmpge k8cmpge +# define kcmplt k8cmplt +# define kcmple k8cmple + #else # error "Unknown CCTK_REAL_PRECISION" @@ -178,6 +212,7 @@ +// Deprecated #define kifmsb(a,b,c) kifthen(a,b,c) #define kifneg(a,b,c) kifmsb(a,b,c) #define kifpos(a,b,c) kifmsb(a,c,b) -- cgit v1.2.3