aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-06-20 02:18:30 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-06-20 02:18:30 +0000
commit21f90caa6e9bdfdb9f6243f7c32d1e429977f6bd (patch)
treef0881bbaf0b0883793462a6c0a5cba74a861c278
parenta8dd909e99aa25fe697e46fee37c0cf48f087488 (diff)
Add new API elements "kifthen" and "vec_architecture"
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@12 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r--src/vectors-4-Altivec.h5
-rw-r--r--src/vectors-4-SSE.h26
-rw-r--r--src/vectors-4-default.h4
-rw-r--r--src/vectors-8-AVX.h4
-rw-r--r--src/vectors-8-DoubleHummer.h4
-rw-r--r--src/vectors-8-SSE2.h76
-rw-r--r--src/vectors-8-VSX.h5
-rw-r--r--src/vectors-8-default.h4
-rw-r--r--src/vectors.h8
9 files changed, 99 insertions, 37 deletions
diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h
index b44b492..1544504 100644
--- a/src/vectors-4-Altivec.h
+++ b/src/vectors-4-Altivec.h
@@ -9,6 +9,8 @@
+#define vec4_architecture "Altivec"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL4_VEC vector float
@@ -158,3 +160,6 @@
#define k4log(x) K4REPL(log,x)
#define k4pow(x,a) K4REPL2(pow,x,a)
#define k4sqrt(x) K4REPL(sqrt,x)
+
+#define k4ifthen(x,y,z) \
+ vec_sel(vec_sra(vec_convert((x), &(vector int*)0, 31), (y), (z))
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index e6dc735..8319c49 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -6,6 +6,10 @@
#include <xmmintrin.h>
+#ifdef __SSE4_1__
+// Intel's SSE 4.1
+# include <smmintrin.h>
+#endif
#ifdef __SSE4A__
// AMD's SSE 4a
# include <ammintrin.h>
@@ -13,6 +17,8 @@
+#define vec4_architecture "SSE"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL4_VEC __m128
@@ -292,3 +298,23 @@ static const union {
#define k4exp(x) K4REPL(exp,x)
#define k4log(x) K4REPL(log,x)
#define k4pow(x,a) K4REPL2(pow,x,a)
+
+// Choice [sign(x)>0 ? y : z]
+#ifdef __SSE4_1__
+# define k4ifthen(x,y,z) (_mm_blendv_ps(y,z,x))
+#else
+# define k4ifthen(x,y,z) \
+ ({ \
+ CCTK_REAL4_VEC const xx=(x_); \
+ CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const yy=(y_); \
+ CCTK_REAL4_VEC const y=yy; \
+ CCTK_REAL4_VEC const zz=(z_); \
+ CCTK_REAL4_VEC const z=zz; \
+ CCTK_REAL4_VEC const c = _mm_and_ps(x,k4sign_mask); \
+ vec4_set(vec4_elt0(not vec4_elt0(c) ? y : z), \
+ vec4_elt1(not vec4_elt1(c) ? y : z), \
+ vec4_elt2(not vec4_elt2(c) ? y : z), \
+ vec4_elt3(not vec4_elt3(c) ? y : z)); \
+ })
+#endif
diff --git a/src/vectors-4-default.h b/src/vectors-4-default.h
index 1277040..b59db7c 100644
--- a/src/vectors-4-default.h
+++ b/src/vectors-4-default.h
@@ -8,6 +8,8 @@
+#define vec4_architecture "scalar (no vectorisation)"
+
// Use CCTK_REAL4
#define CCTK_REAL4_VEC CCTK_REAL4
@@ -78,3 +80,5 @@
#define k4log(x) (logf(x))
#define k4pow(x,a) (powf(x,a))
#define k4sqrt(x) (sqrtf(x))
+
+#define k4ifthen(x,y,z) ((x)>=0?(y):(z))
diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h
index 0f08096..ae7a4d5 100644
--- a/src/vectors-8-AVX.h
+++ b/src/vectors-8-AVX.h
@@ -13,6 +13,8 @@
+#define vec8_architecture "AVX"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL8_VEC __m256d
@@ -194,3 +196,5 @@ static const k8const_t k8abs_mask_union =
#define k8exp(x) K8REPL(exp,x)
#define k8log(x) K8REPL(log,x)
#define k8pow(x,a) K8REPL2(pow,x,a)
+
+#define k8ifthen(x,y,z) (_mm256_blendv_pd(y,z,x))
diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h
index 16ed8a0..62b5f93 100644
--- a/src/vectors-8-DoubleHummer.h
+++ b/src/vectors-8-DoubleHummer.h
@@ -9,6 +9,8 @@
+#define vec8_architecture "Double Hummer"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL8_VEC double _Complex
@@ -204,3 +206,5 @@
#define k8log(x) K8REPL(log,x)
#define k8pow(x,a) K8REPL2(pow,x,a)
#define k8sqrt(x) K8REPL(sqrt,x)
+
+#define k8ifthen(x,y,z) fpsel(y,z,x)
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h
index 4a3f4e2..a559cf8 100644
--- a/src/vectors-8-SSE2.h
+++ b/src/vectors-8-SSE2.h
@@ -17,6 +17,8 @@
+#define vec8_architecture "SSE2"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL8_VEC __m128d
@@ -156,43 +158,6 @@ static const union {
} k8abs_mask_union = {{ 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }};
#define k8abs_mask (k8sign_mask_union.v)
-// Choice [sign(x)>0 ? y : z]
-#ifdef __SSE4_1__
-# define k8ifthen(x,y,z) (_mm_blendv_pd(y,z,x))
-#elif 0
-# define k8ifthen(x,y,z) \
- ({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
- CCTK_REAL8_VEC const zz=(z_); \
- CCTK_REAL8_VEC const z=zz; \
- int const m = _mm_movemask_pd(x); \
- CCTK_REAL8_VEC r; \
- switch (m) { \
- case 0: r = y; break; \
- case 1: r = _mm_move_sd(y,z); break; \
- case 2: r = _mm_move_sd(z,y); break; \
- case 3: r = z; break; \
- } \
- r; \
- })
-#else
-# define k8ifthen(x,y,z) \
- ({ \
- CCTK_REAL8_VEC const xx=(x_); \
- CCTK_REAL8_VEC const x=xx; \
- CCTK_REAL8_VEC const yy=(y_); \
- CCTK_REAL8_VEC const y=yy; \
- CCTK_REAL8_VEC const zz=(z_); \
- CCTK_REAL8_VEC const z=zz; \
- CCTK_REAL8_VEC const c = _mm_and_pd(x,k8sign_mask); \
- vec8_set(not vec8_elt0(c) ? vec8_elt0(y) : vec8_elt0(z), \
- not vec8_elt1(c) ? vec8_elt1(y) : vec8_elt1(z)); \
- })
-#endif
-
// Operators
#define k8pos(x) (x)
#define k8neg(x) (_mm_xor_pd(x,k8sign_mask))
@@ -236,3 +201,40 @@ static const union {
#define k8exp(x) K8REPL(exp,x)
#define k8log(x) K8REPL(log,x)
#define k8pow(x,a) K8REPL2(pow,x,a)
+
+// Choice [sign(x)>0 ? y : z]
+#ifdef __SSE4_1__
+# define k8ifthen(x,y,z) (_mm_blendv_pd(y,z,x))
+#elif 0
+# define k8ifthen(x,y,z) \
+ ({ \
+ CCTK_REAL8_VEC const xx=(x_); \
+ CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const yy=(y_); \
+ CCTK_REAL8_VEC const y=yy; \
+ CCTK_REAL8_VEC const zz=(z_); \
+ CCTK_REAL8_VEC const z=zz; \
+ int const m = _mm_movemask_pd(x); \
+ CCTK_REAL8_VEC r; \
+ switch (m) { \
+ case 0: r = y; break; \
+ case 1: r = _mm_move_sd(y,z); break; \
+ case 2: r = _mm_move_sd(z,y); break; \
+ case 3: r = z; break; \
+ } \
+ r; \
+ })
+#else
+# define k8ifthen(x,y,z) \
+ ({ \
+ CCTK_REAL8_VEC const xx=(x_); \
+ CCTK_REAL8_VEC const x=xx; \
+ CCTK_REAL8_VEC const yy=(y_); \
+ CCTK_REAL8_VEC const y=yy; \
+ CCTK_REAL8_VEC const zz=(z_); \
+ CCTK_REAL8_VEC const z=zz; \
+ CCTK_REAL8_VEC const c = _mm_and_pd(x,k8sign_mask); \
+ vec8_set(vec8_elt0(not vec8_elt0(c) ? y : z), \
+ vec8_elt1(not vec8_elt1(c) ? y : z)); \
+ })
+#endif
diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h
index f47d1df..d01c22a 100644
--- a/src/vectors-8-VSX.h
+++ b/src/vectors-8-VSX.h
@@ -9,6 +9,8 @@
+#define vec8_architecture "VSX"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL8_VEC vector double
@@ -106,3 +108,6 @@
#define k8log(x) K8REPL(log,x)
#define k8pow(x,a) K8REPL2(pow,x,a)
#define k8sqrt(x) K8REPL(sqrt,x)
+
+#define k8ifthen(x,y,z) \
+ vec_sel(vec_sra(vec_convert((x), &(vector long long*)0, 63), (y), (z))
diff --git a/src/vectors-8-default.h b/src/vectors-8-default.h
index bbe0150..6f81b91 100644
--- a/src/vectors-8-default.h
+++ b/src/vectors-8-default.h
@@ -8,6 +8,8 @@
+#define vec8_architecture "scalar (no vectorisation)"
+
// Use CCTK_REAL8
#define CCTK_REAL8_VEC CCTK_REAL8
@@ -78,3 +80,5 @@
#define k8log(x) (log(x))
#define k8pow(x,a) (pow(x,a))
#define k8sqrt(x) (sqrt(x))
+
+#define k8ifthen(x,y,z) ((x)>=0?(y):(z))
diff --git a/src/vectors.h b/src/vectors.h
index 03296e7..2a4b04e 100644
--- a/src/vectors.h
+++ b/src/vectors.h
@@ -44,6 +44,8 @@
#if defined(CCTK_REAL_PRECISION_4)
+# define vec_architecture vec8_architecture
+
# define CCTK_REAL_VEC CCTK_REAL4_VEC
# define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE
@@ -85,8 +87,12 @@
# define kpow k4pow
# define ksqrt k4sqrt
+# define kifthen k4ifthen
+
#elif defined(CCTK_REAL_PRECISION_8)
+# define vec_architecture vec4_architecture
+
# define CCTK_REAL_VEC CCTK_REAL8_VEC
# define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE
@@ -128,6 +134,8 @@
# define kpow k8pow
# define ksqrt k8sqrt
+# define kifthen k8ifthen
+
#else
# error "Unknown CCTK_REAL_PRECISION"