aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2012-09-14 18:48:46 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2012-09-14 18:48:46 +0000
commit6005a49ff06f8368b516c168e5a396530c1193d4 (patch)
tree83ee6b75212e9f106c9854b3cca7060a63504a03
parentff90dc4b8644b18cb8c01bdba1df2488518652ba (diff)
Add support for (dynamic) if-then expressions
Add types for holding integers and booleans, and vectors thereof. Add if-then expressions. Add floating point comparisons. Update tests. git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@66 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r--src/test.cc59
-rw-r--r--src/vectors-4-Altivec.h6
-rw-r--r--src/vectors-4-SSE.h19
-rw-r--r--src/vectors-4-default.h24
-rw-r--r--src/vectors-8-AVX.h23
-rw-r--r--src/vectors-8-DoubleHummer.h40
-rw-r--r--src/vectors-8-QPX.h18
-rw-r--r--src/vectors-8-SSE2.h19
-rw-r--r--src/vectors-8-VSX.h74
-rw-r--r--src/vectors-8-default.h24
-rw-r--r--src/vectors.cc13
-rw-r--r--src/vectors.h37
12 files changed, 313 insertions, 43 deletions
diff --git a/src/test.cc b/src/test.cc
index 73dd9e2..ed1ab85 100644
--- a/src/test.cc
+++ b/src/test.cc
@@ -7,6 +7,7 @@
#include <cassert>
#include <cmath>
#include <cstdio>
+#include <cstring>
#include <limits>
using namespace std;
@@ -21,17 +22,18 @@ inline CCTK_REAL my_copysign (CCTK_REAL const x, CCTK_REAL const y)
inline int my_isnan (CCTK_REAL const x)
{
- return std::isnan(x);
+ using namespace std;
+ return isnan(x);
}
inline int my_signbit (CCTK_REAL const x)
{
- return std::signbit(x);
+ using namespace std;
+ return signbit(x);
}
inline CCTK_REAL my_sgn (CCTK_REAL const x)
{
- using namespace std;
return x == (CCTK_REAL)0.0 ? (CCTK_REAL)0.0 : my_copysign((CCTK_REAL)1.0, x);
}
@@ -83,6 +85,27 @@ inline CCTK_REAL my_sgn (CCTK_REAL const x)
} \
} while(0)
+#define VECBITTEST(testname, vecexpr, scalarexpr) \
+ do { \
+ if (verbose) { \
+ CCTK_VInfo (CCTK_THORNSTRING, "Test %s...", testname); \
+ } \
+ CCTK_BOOLEAN_VEC rv = (vecexpr); \
+ for (int i=0; i<CCTK_REAL_VEC_SIZE; i++) { \
+ CCTK_BOOLEAN res = (scalarexpr); \
+ CCTK_BOOLEAN vecres = vec_elt(rv,i); \
+ if (memcmp(&vecres, &res, sizeof vecres) == 0) { \
+ passed++; \
+ } else { \
+ CCTK_VParamWarn(CCTK_THORNSTRING, \
+ "Failed test %s: " \
+ "for element %d, expected %lld, received %lld", \
+ testname, i, (long long)res, (long long)vecres); \
+ } \
+ numtests++; \
+ } \
+ } while(0)
+
extern "C"
@@ -257,6 +280,7 @@ void Vectors_Test(CCTK_ARGUMENTS)
VECTEST("ktan", ktan(xv), tan(x[i]) );
VECTEST("ktanh", ktanh(xv), tanh(x[i]) );
+#if 0
VECTEST("kifpos positive",
kifpos(av, bv, cv), my_signbit(a[i]) ? c[i] : b[i]);
VECTEST("kifpos negative",
@@ -270,6 +294,35 @@ void Vectors_Test(CCTK_ARGUMENTS)
kifneg(bv, bv, cv), my_signbit(b[i]) ? b[i] : c[i]);
VECTEST("kifneg 0", kifneg(vec_set1(0.),bv,cv), c[i]);
VECTEST("kifneg -0", kifneg(vec_set1(-0.),bv,cv), b[i]);
+#endif
+
+ CCTK_BOOLEAN_VEC testklfalse = klfalse;
+ CCTK_BOOLEAN_VEC testkltrue = kltrue;
+ CCTK_BOOLEAN klfalse1 = vec_elt(testklfalse,0);
+ CCTK_BOOLEAN kltrue1 = vec_elt(testkltrue ,0);
+ VECBITTEST("klnot F", klnot(klfalse), kltrue1 );
+ VECBITTEST("klnot T", klnot(kltrue ), klfalse1);
+ VECBITTEST("kland FF", kland(klfalse,klfalse), klfalse1);
+ VECBITTEST("kland FT", kland(klfalse,kltrue ), klfalse1);
+ VECBITTEST("kland TF", kland(kltrue ,klfalse), klfalse1);
+ VECBITTEST("kland TT", kland(kltrue ,kltrue ), kltrue1 );
+ VECBITTEST("klor FF", klor(klfalse,klfalse), klfalse1);
+ VECBITTEST("klor FT", klor(klfalse,kltrue ), kltrue1 );
+ VECBITTEST("klor TF", klor(kltrue ,klfalse), kltrue1 );
+ VECBITTEST("klor TT", klor(kltrue ,kltrue ), kltrue1 );
+ VECBITTEST("klxor FF", klxor(klfalse,klfalse), klfalse1);
+ VECBITTEST("klxor FT", klxor(klfalse,kltrue ), kltrue1 );
+ VECBITTEST("klxor TF", klxor(kltrue ,klfalse), kltrue1 );
+ VECBITTEST("klxor TT", klxor(kltrue ,kltrue ), klfalse1);
+ VECTEST("kifthen F", kifthen(klfalse,av,bv), b[i]);
+ VECTEST("kifthen T", kifthen(kltrue ,av,bv), a[i]);
+
+ VECBITTEST("kcmpeq", kcmpeq(av,bv), a[i]==b[i]?kltrue1:klfalse1);
+ VECBITTEST("kcmpne", kcmpne(av,bv), a[i]!=b[i]?kltrue1:klfalse1);
+ VECBITTEST("kcmpgt", kcmpgt(av,bv), a[i]> b[i]?kltrue1:klfalse1);
+ VECBITTEST("kcmpge", kcmpge(av,bv), a[i]>=b[i]?kltrue1:klfalse1);
+ VECBITTEST("kcmplt", kcmplt(av,bv), a[i]< b[i]?kltrue1:klfalse1);
+ VECBITTEST("kcmple", kcmple(av,bv), a[i]<=b[i]?kltrue1:klfalse1);
if (passed != numtests) {
CCTK_VWarn(CCTK_WARN_ALERT, __LINE__, __FILE__, CCTK_THORNSTRING,
diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h
index d6281c8..3975c77 100644
--- a/src/vectors-4-Altivec.h
+++ b/src/vectors-4-Altivec.h
@@ -17,6 +17,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL4_VEC_SIZE 4
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER4 CCTK_REAL4
+#define CCTK_BOOLEAN4 CCTK_REAL4
+#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC
+#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC
+
// Create vectors, extract vector elements
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index 9af7189..e6ff34e 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -48,6 +48,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL4_VEC_SIZE 4
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER4 CCTK_REAL4
+#define CCTK_BOOLEAN4 CCTK_REAL4
+#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC
+#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC
+
union k4const_t {
@@ -385,8 +391,10 @@ static const k4const_t k4one = { f: { 1.0f, 1.0f, 1.0f, 1.0f, }};
#define k4tan(x) K4REPL(tanf,x)
#define k4tanh(x) K4REPL(tanhf,x)
-static const k4const_t k4lfalse = {{ +0U, +0U, +0U, +0U, }};
-static const k4const_t k4ltrue = {{ -1U, -1U, -1U, -1U, }};
+static const k4const_t k4lfalse_ = {{ +0U, +0U, +0U, +0U, }};
+static const k4const_t k4ltrue_ = {{ -1U, -1U, -1U, -1U, }};
+#define k4lfalse (k4lfalse_.vf)
+#define k4ltrue (k4ltrue_.vf)
#define k4lnot(x) (_mm_xor_ps(k4ltrue,x))
#define k4land(x,y) (_mm_and_ps(x,y))
#define k4lor(x,y) (_mm_or_ps(x,y))
@@ -442,3 +450,10 @@ static const k4const_t k4ltrue = {{ -1U, -1U, -1U, -1U, }};
_mm_or_ps(_mm_and_ps(x, y), _mm_andnot_ps(x, z)); \
})
#endif
+
+#define k4cmpeq(x,y) (_mm_cmpeq_ps(x,y))
+#define k4cmpne(x,y) (_mm_cmpneq_ps(x,y))
+#define k4cmpgt(x,y) (_mm_cmpgt_ps(x,y))
+#define k4cmpge(x,y) (_mm_cmpge_ps(x,y))
+#define k4cmplt(x,y) (_mm_cmplt_ps(x,y))
+#define k4cmple(x,y) (_mm_cmple_ps(x,y))
diff --git a/src/vectors-4-default.h b/src/vectors-4-default.h
index d1b2135..9b2f7f6 100644
--- a/src/vectors-4-default.h
+++ b/src/vectors-4-default.h
@@ -19,6 +19,12 @@
// Number of vector elements in a vector
#define CCTK_REAL4_VEC_SIZE 1
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER4 CCTK_REAL4
+#define CCTK_BOOLEAN4 CCTK_REAL4
+#define CCTK_INTEGER4_VEC CCTK_REAL4_VEC
+#define CCTK_BOOLEAN4_VEC CCTK_REAL4_VEC
+
// Create a vector replicating a scalar
@@ -114,4 +120,20 @@
# define k4signbit(x) (signbit(x))
#endif
-#define k4ifthen(x,y,z) (k4signbit(x)?(y):(z))
+#define k4l2r(x_) ({ CCTK_INT4 x__=(x_); CCTK_INT4 x=x__; *(CCTK_REAL4*)&x; })
+#define k4r2l(x_) ({ CCTK_REAL4 x__=(x_); CCTK_REAL4 x=x__; *(CCTK_INT4*)&x; })
+#define k4lfalse k4l2r(0)
+#define k4ltrue k4l2r(1)
+#define k4lnot(x) k4l2r(!k4r2l(x))
+#define k4land(x,y) k4l2r(k4r2l(x) && k4r2l(y))
+#define k4lor(x,y) k4l2r(k4r2l(x) || k4r2l(y))
+#define k4lxor(x,y) k4l2r(!k4r2l(x) != !k4r2l(y))
+
+#define k4ifthen(x,y,z) ((x)?(y):(z))
+
+#define k4cmpeq(x,y) k4l2r((x)==(y))
+#define k4cmpne(x,y) k4l2r((x)!=(y))
+#define k4cmpgt(x,y) k4l2r((x)>(y))
+#define k4cmpge(x,y) k4l2r((x)>=(y))
+#define k4cmplt(x,y) k4l2r((x)<(y))
+#define k4cmple(x,y) k4l2r((x)<=(y))
diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h
index 1ba2583..6882523 100644
--- a/src/vectors-8-AVX.h
+++ b/src/vectors-8-AVX.h
@@ -31,6 +31,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 4
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 CCTK_REAL8
+#define CCTK_BOOLEAN8 CCTK_REAL8
+#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC
+#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC
+
union k8const_t {
@@ -299,10 +305,21 @@ static const k8const_t k8one = { f: { 1.0, 1.0, 1.0, 1.0, }};
#define k8tan(x) K8REPL(tan,x)
#define k8tanh(x) K8REPL(tanh,x)
-static const k8const_t k8lfalse = {{ K8_ZERO, K8_ZERO, K8_ZERO, K8_ZERO, }};
-static const k8const_t k8ltrue = {{ K8_IMIN, K8_IMIN, K8_IMIN, K8_IMIN, }};
-#define k8lnot(x) (_mm256_xor_pd(k8sign_mask,x))
+static const k8const_t k8lfalse_ =
+ {{ K8_ZERO, K8_ZERO, K8_ZERO, K8_ZERO, }};
+static const k8const_t k8ltrue_ =
+ {{ K8_NOTZERO, K8_NOTZERO, K8_NOTZERO, K8_NOTZERO, }};
+#define k8lfalse (k8lfalse_.vf)
+#define k8ltrue (k8ltrue_.vf)
+#define k8lnot(x) (_mm256_xor_pd(k8ltrue,x))
#define k8land(x,y) (_mm256_and_pd(x,y))
#define k8lor(x,y) (_mm256_or_pd(x,y))
#define k8lxor(x,y) (_mm256_xor_pd(x,y))
#define k8ifthen(x,y,z) (_mm256_blendv_pd(z,y,x))
+
+#define k8cmpeq(x,y) (_mm256_cmp_pd(x,y,_CMP_EQ_OQ))
+#define k8cmpne(x,y) (_mm256_cmp_pd(x,y,_CMP_NEQ_OQ))
+#define k8cmpgt(x,y) (_mm256_cmp_pd(x,y,_CMP_GT_OQ))
+#define k8cmpge(x,y) (_mm256_cmp_pd(x,y,_CMP_GE_OQ))
+#define k8cmplt(x,y) (_mm256_cmp_pd(x,y,_CMP_LT_OQ))
+#define k8cmple(x,y) (_mm256_cmp_pd(x,y,_CMP_LE_OQ))
diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h
index ffb04b4..0189989 100644
--- a/src/vectors-8-DoubleHummer.h
+++ b/src/vectors-8-DoubleHummer.h
@@ -3,6 +3,8 @@
// Use the type double _Complex directly, without introducing a wrapper class
// Use macros instead of inline functions
+// See <http://publib.boulder.ibm.com/infocenter/compbgpl/v9v111/index.jsp>
+
#include <assert.h>
@@ -21,6 +23,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 2
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 CCTK_REAL8
+#define CCTK_BOOLEAN8 CCTK_REAL8
+#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC
+#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC
+
union k8const_t {
@@ -195,7 +203,9 @@ union k8const_t {
#define k8nmsub(x,y,z) (__fpnmsub(z,x,y))
// Cheap functions
-#define k8fabs(x) (__fpabs(x))
+// TODO: handle -0 correctly
+#define k8copysign(x,y) (k8ifthen(y,k8fabs(x),k8fnabs(x)))
+#define k8fabs(x) (__fpabs(x))
#define k8fmax(x_,y_) \
({ \
CCTK_REAL8_VEC const x__=(x_); \
@@ -220,11 +230,8 @@ static const k8const_t k8mone = {{ -1.0, -1.0, }};
({ \
CCTK_REAL_VEC x__=(x_); \
CCTK_REAL_VEC x=x__; \
- /* TODO: this assumes that __fpsel says -0>=+0; \
- if this is not so, we need k8abs(x) instead of x for iszero. */ \
- CCTK_REAL_VEC iszero = k8land(__fpsel(x, k8lfalse.vf, k8ltrue.vf), \
- __fpsel(k8neg(x), k8lfalse.vf, k8ltrue.vf)); \
- CCTK_REAL_VEC signedone = __fpsel(x, k8mone.vf, k8one.vf); \
+ CCTK_REAL_VEC iszero = k8fnabs(x); \
+ CCTK_REAL_VEC signedone = k8ifthen(x, k8one.vf, k8mone.vf); \
k8ifthen(iszero, k8zero.vf, signedone); \
})
// Estimate for reciprocal square root
@@ -301,10 +308,21 @@ static const k8const_t k8mone = {{ -1.0, -1.0, }};
#define k8tan(x) K8REPL(tan,x)
#define k8tanh(x) K8REPL(tanh,x)
-static const k8const_t k8lfalse = {{ -1.0, -1.0, }};
-static const k8const_t k8ltrue = {{ +1.0, +1.0, }};
-#define k8lnot(x) (__fpneg(x))
-#define k8land(x,y) (k8ifthen(x,y,k8lfalse.vf))
-#define k8lor(x,y) (k8ifthen(x,k8ltrue.vf,y))
+// canonical true is +1.0, canonical false is -1.0
+// >=0 is true, -0 is true (?), nan is false (?)
+static const k8const_t k8lfalse_ = {{ -1.0, -1.0, }};
+static const k8const_t k8ltrue_ = {{ +1.0, +1.0, }};
+#define k8lfalse (k8lfalse_.vf)
+#define k8ltrue (k8ltrue_.vf)
+#define k8lnot(x) (k8ifthen(x,k8lfalse,k8ltrue))
+#define k8land(x,y) (k8ifthen(x,y,k8lfalse))
+#define k8lor(x,y) (k8ifthen(x,k8ltrue,y))
#define k8lxor(x,y) (k8ifthen(x,k8lnot(y),y))
#define k8ifthen(x,y,z) (__fpsel(x,z,y))
+
+#define k8cmpeq(x,y) (__fpnabs((x)-(y)))
+#define k8cmpne(x,y) (k8lnot(__fpnabs((x)-(y))))
+#define k8cmpgt(x,y) (k8lnot(k8cmple(x,y)))
+#define k8cmpge(x,y) ((x)-(y))
+#define k8cmplt(x,y) (k8lnot(k8cmpge(x,y)))
+#define k8cmple(x,y) ((y)-(x))
diff --git a/src/vectors-8-QPX.h b/src/vectors-8-QPX.h
index 80762fe..93a7707 100644
--- a/src/vectors-8-QPX.h
+++ b/src/vectors-8-QPX.h
@@ -6,6 +6,8 @@
// Note: bgxlC_r does not like const declarations, so we need to cast
// them away and/or omit them everywhere
+// See <http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp>
+
#include <assert.h>
@@ -19,11 +21,18 @@
#define vec8_architecture "QPX"
// Vector type corresponding to CCTK_REAL
+// TODO: Use a typedef to avoid the "const" issue? Or use a struct?
#define CCTK_REAL8_VEC vector4double
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 4
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 CCTK_REAL8
+#define CCTK_BOOLEAN8 CCTK_REAL8
+#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC
+#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC
+
union k8const_t {
@@ -300,6 +309,8 @@ union k8const_t {
#define k8tan(x) K8REPL(tan,x)
#define k8tanh(x) K8REPL(tanh,x)
+// canonical true is +1.0, canonical false is -1.0
+// >=0 is true, -0 is true, nan is false
#define k8lfalse \
({ CCTK_REAL8_VEC dummy; vec_logical(dummy,dummy,0x0); })
#define k8ltrue \
@@ -309,3 +320,10 @@ union k8const_t {
#define k8lor(x,y) (vec_or(x,y))
#define k8lxor(x,y) (vec_xor(x,y))
#define k8ifthen(x,y,z) (vec_sel(z,x,y))
+
+#define k8cmpeq(x,y) (vec_cmpeq(x,y))
+#define k8cmpne(x,y) (vec_not(vec_cmpeq(x,y)))
+#define k8cmpgt(x,y) (vec_cmpgt(x,y))
+#define k8cmpge(x,y) (vec_not(vec_cmplt(x,y)))
+#define k8cmplt(x,y) (vec_cmplt(x,y))
+#define k8cmple(x,y) (vec_not(vec_cmpgt(x,y)))
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h
index 1b6aeb5..cfefbce 100644
--- a/src/vectors-8-SSE2.h
+++ b/src/vectors-8-SSE2.h
@@ -58,6 +58,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 2
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 CCTK_REAL8
+#define CCTK_BOOLEAN8 CCTK_REAL8
+#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC
+#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC
+
union k8const_t {
@@ -333,8 +339,10 @@ static const k8const_t k8one = { f: { 1.0, 1.0, }};
#define k8tan(x) K8REPL(tan,x)
#define k8tanh(x) K8REPL(tanh,x)
-static const k8const_t k8lfalse = {{ +0LL, +0LL, }};
-static const k8const_t k8ltrue = {{ -1LL, -1LL, }};
+static const k8const_t k8lfalse_ = {{ +0LL, +0LL, }};
+static const k8const_t k8ltrue_ = {{ -1LL, -1LL, }};
+#define k8lfalse (k8lfalse_.vf)
+#define k8ltrue (k8ltrue_.vf)
#define k8lnot(x) (_mm_xor_pd(k8ltrue,x))
#define k8land(x,y) (_mm_and_pd(x,y))
#define k8lor(x,y) (_mm_or_pd(x,y))
@@ -412,3 +420,10 @@ static const k8const_t k8ione = {{ 0x1ULL, 0x1ULL, }};
_mm_or_pd(_mm_and_pd(x, y), _mm_andnot_pd(x, z)); \
})
#endif
+
+#define k8cmpeq(x,y) (_mm_cmpeq_pd(x,y))
+#define k8cmpne(x,y) (_mm_cmpneq_pd(x,y))
+#define k8cmpgt(x,y) (_mm_cmpgt_pd(x,y))
+#define k8cmpge(x,y) (_mm_cmpge_pd(x,y))
+#define k8cmplt(x,y) (_mm_cmplt_pd(x,y))
+#define k8cmple(x,y) (_mm_cmple_pd(x,y))
diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h
index 07f19d6..35af574 100644
--- a/src/vectors-8-VSX.h
+++ b/src/vectors-8-VSX.h
@@ -3,6 +3,8 @@
// Use the type vector double directly, without introducing a wrapper class
// Use macros instead of inline functions
+// See <http://pic.dhe.ibm.com/infocenter/comphelp/v111v131/index.jsp>
+
#include <altivec.h>
@@ -17,6 +19,12 @@
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 2
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 long long
+#define CCTK_BOOLEAN8 long long
+#define CCTK_INTEGER8_VEC vector long long
+#define CCTK_BOOLEAN8_VEC vector bool long long
+
// Create vectors, extract vector elements
@@ -55,6 +63,25 @@
// stvxl instruction doesn't exist for double precision
#define vec8_store_nta(p,x) vec8_store(p,x)
+// Store a partial vector (aligned and non-temporal)
+#define vec8_store_partial_prepare(i,imin,imax) \
+ bool const v8stp_lo = (i)>=(imin); \
+ bool const v8stp_hi = (i)+CCTK_REAL8_VEC_SIZE-1<(imax)
+#define vec8_store_nta_partial(p_,x_) \
+ ({ \
+ CCTK_REAL8& p__=(p_); \
+ CCTK_REAL8& p=p__; \
+ CCTK_REAL8_VEC const x__=(x_); \
+ CCTK_REAL8_VEC const x=x__; \
+ if (CCTK_BUILTIN_EXPECT(v8stp_lo and v8stp_hi, true)) { \
+ vec8_store(p,x); \
+ } else if (v8stp_lo) { \
+ (&p)[0]=vec8_elt0(x); \
+ } else if (v8stp_hi) { \
+ (&p)[1]=vec8_elt1(x); \
+ } \
+ })
+
// Store a lower or higher partial vector (aligned and non-temporal);
// the non-temporal hint is probably ignored
#define vec8_store_nta_partial_lo(p,x,n) ((&(p))[0]=(x)[0])
@@ -80,10 +107,20 @@
#define k8nmsub(x,y,z) (vec_nmsub(x,y,z))
// Cheap functions
-#define k8fabs(x) (vec_abs(x))
-#define k8fmax(x,y) (vec_max(x,y))
-#define k8fmin(x,y) (vec_min(x,y))
-#define k8fnabs(x) (vec_nabs(x))
+#define k8copysign(x,y) (vec_cpsgn(y,x))
+#define k8fabs(x) (vec_abs(x))
+#define k8fmax(x,y) (vec_max(x,y))
+#define k8fmin(x,y) (vec_min(x,y))
+#define k8fnabs(x) (vec_nabs(x))
+#define k8sgn(x_) \
+ ({ \
+ CCTK_REAL8_VEC x__=(x_); \
+ CCTK_REAL8_VEC x=x__; \
+ CCTK_BOOLEAN8_VEC iszero = k8cmpeq(x,vec8_set1((CCTK_REAL8)0.0)); \
+ CCTK_REAL8_VEC signedone = k8copysign(vec8_set1((CCTK_REAL8)1.0),x); \
+ k8ifthen(iszero, vec8_set1((CCTK_REAL8)0.0), signedone); \
+ })
+#define k8sqrt(x) (vec_sqrt(x))
// Expensive functions
#define K8REPL(f,x_) \
@@ -129,13 +166,24 @@
#define k8tan(x) K8REPL(tan,x)
#define k8tanh(x) K8REPL(tanh,x)
-/* #define k8ifmsb(x,y,z) \ */
-/* (vec_sel((z), (y), vec_sra(vec_convert((x), &(vector long long*)0), 63))) */
-
-#define k8lfalse (vec_splats(+0LL))
-#define k8ltrue (vec_splats(-1LL))
-#define k8lnot(x) (vec_xor(x,k8ltrue))
-#define k8land(x,y,z) (vec_and(x,y))
-#define k8lor(x,y,z) (vec_or(x,y))
-#define k8lxor(x,y,z) (vec_xor(x,y))
+// canonical true is -1LL, canonical false is 0LL
+// truth values are interpreted bit-wise
+#define k8lfalse ({ CCTK_BOOLEAN8_VEC dummy; vec_xor(dummy,dummy); })
+#define k8ltrue (k8lnot(k8lfalse))
+#define k8lnot(x_) \
+ ({ \
+ CCTK_BOOLEAN8_VEC x__=(x_); \
+ CCTK_BOOLEAN8_VEC x=x__; \
+ vec_nor(x,x); \
+ })
+#define k8land(x,y) (vec_and(x,y))
+#define k8lor(x,y) (vec_or(x,y))
+#define k8lxor(x,y) (vec_xor(x,y))
#define k8ifthen(x,y,z) (vec_sel(z,y,x))
+
+#define k8cmpeq(x,y) (vec_cmpeq(x,y))
+#define k8cmpne(x,y) (k8lnot(vec_cmpeq(x,y)))
+#define k8cmpgt(x,y) (vec_cmpgt(x,y))
+#define k8cmpge(x,y) (vec_cmpge(x,y))
+#define k8cmplt(x,y) (vec_cmplt(x,y))
+#define k8cmple(x,y) (vec_cmple(x,y))
diff --git a/src/vectors-8-default.h b/src/vectors-8-default.h
index e9e2734..d55e29e 100644
--- a/src/vectors-8-default.h
+++ b/src/vectors-8-default.h
@@ -19,6 +19,12 @@
// Number of vector elements in a vector
#define CCTK_REAL8_VEC_SIZE 1
+// Integer and boolean types corresponding to this real type
+#define CCTK_INTEGER8 CCTK_REAL8
+#define CCTK_BOOLEAN8 CCTK_REAL8
+#define CCTK_INTEGER8_VEC CCTK_REAL8_VEC
+#define CCTK_BOOLEAN8_VEC CCTK_REAL8_VEC
+
// Create a vector replicating a scalar
@@ -112,4 +118,20 @@
# define k8signbit(x) (signbit(x))
#endif
-#define k8ifthen(x,y,z) (k8signbit(x)?(y):(z))
+#define k8l2r(x_) ({ CCTK_INT8 x__=(x_); CCTK_INT8 x=x__; *(CCTK_REAL8*)&x; })
+#define k8r2l(x_) ({ CCTK_REAL8 x__=(x_); CCTK_REAL8 x=x__; *(CCTK_INT8*)&x; })
+#define k8lfalse k8l2r(0)
+#define k8ltrue k8l2r(1)
+#define k8lnot(x) k8l2r(!k8r2l(x))
+#define k8land(x,y) k8l2r(k8r2l(x) && k8r2l(y))
+#define k8lor(x,y) k8l2r(k8r2l(x) || k8r2l(y))
+#define k8lxor(x,y) k8l2r(!k8r2l(x) != !k8r2l(y))
+
+#define k8ifthen(x,y,z) ((x)?(y):(z))
+
+#define k8cmpeq(x,y) k8l2r((x)==(y))
+#define k8cmpne(x,y) k8l2r((x)!=(y))
+#define k8cmpgt(x,y) k8l2r((x)>(y))
+#define k8cmpge(x,y) k8l2r((x)>=(y))
+#define k8cmplt(x,y) k8l2r((x)<(y))
+#define k8cmple(x,y) k8l2r((x)<=(y))
diff --git a/src/vectors.cc b/src/vectors.cc
index ffff4be..489429b 100644
--- a/src/vectors.cc
+++ b/src/vectors.cc
@@ -1,13 +1,14 @@
-
-#include "cctk.h"
-#include "cctk_Arguments.h"
-#include "cctk_Parameters.h"
#include "vectors.h"
+#include <cctk.h>
+#include <cctk_Arguments.h>
+#include <cctk_Parameters.h>
+
extern "C"
int Vectors_Startup(void)
{
- CCTK_VInfo(CCTK_THORNSTRING, "Using vector size %d for architecture %s",
- CCTK_REAL_VEC_SIZE, vec_architecture);
+ CCTK_VInfo(CCTK_THORNSTRING,
+ "Using vector size %d for architecture %s",
+ CCTK_REAL_VEC_SIZE, vec_architecture);
return 0;
}
diff --git a/src/vectors.h b/src/vectors.h
index 9ed8367..c87446e 100644
--- a/src/vectors.h
+++ b/src/vectors.h
@@ -22,7 +22,7 @@
# else
# include "vectors-8-SSE2.h"
# endif
-# elif defined(_ARCH_QP) // Blue Gene/Q QPX
+# elif defined(__bgq__) && defined(__VECTOR4DOUBLE__) // Blue Gene/Q QPX
# include "vectors-8-QPX.h"
# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Power VSX
# include "vectors-8-VSX.h"
@@ -50,6 +50,10 @@
# define CCTK_REAL_VEC CCTK_REAL4_VEC
# define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE
+# define CCTK_INTEGER CCTK_INTEGER4
+# define CCTK_BOOLEAN CCTK_BOOLEAN4
+# define CCTK_INTEGER_VEC CCTK_INTEGER4_VEC
+# define CCTK_BOOLEAN_VEC CCTK_BOOLEAN4_VEC
# define vec_set1 vec4_set1
# define vec_set vec4_set
@@ -105,14 +109,31 @@
# define ktan k4tan
# define ktanh k4tanh
+# define klfalse k4lfalse
+# define kltrue k4ltrue
+# define klnot k4lnot
+# define kland k4land
+# define klor k4lor
+# define klxor k4lxor
# define kifthen k4ifthen
+# define kcmpeq k4cmpeq
+# define kcmpne k4cmpne
+# define kcmpgt k4cmpgt
+# define kcmpge k4cmpge
+# define kcmplt k4cmplt
+# define kcmple k4cmple
+
#elif defined(CCTK_REAL_PRECISION_8)
# define vec_architecture vec8_architecture
# define CCTK_REAL_VEC CCTK_REAL8_VEC
# define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE
+# define CCTK_INTEGER CCTK_INTEGER8
+# define CCTK_BOOLEAN CCTK_BOOLEAN8
+# define CCTK_INTEGER_VEC CCTK_INTEGER8_VEC
+# define CCTK_BOOLEAN_VEC CCTK_BOOLEAN8_VEC
# define vec_set1 vec8_set1
# define vec_set vec8_set
@@ -168,8 +189,21 @@
# define ktan k8tan
# define ktanh k8tanh
+# define klfalse k8lfalse
+# define kltrue k8ltrue
+# define klnot k8lnot
+# define kland k8land
+# define klor k8lor
+# define klxor k8lxor
# define kifthen k8ifthen
+# define kcmpeq k8cmpeq
+# define kcmpne k8cmpne
+# define kcmpgt k8cmpgt
+# define kcmpge k8cmpge
+# define kcmplt k8cmplt
+# define kcmple k8cmple
+
#else
# error "Unknown CCTK_REAL_PRECISION"
@@ -178,6 +212,7 @@
+// Deprecated
#define kifmsb(a,b,c) kifthen(a,b,c)
#define kifneg(a,b,c) kifmsb(a,b,c)
#define kifpos(a,b,c) kifmsb(a,c,b)