aboutsummaryrefslogtreecommitdiff
path: root/src/vectors-4-SSE.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vectors-4-SSE.h')
-rw-r--r--src/vectors-4-SSE.h26
1 files changed, 26 insertions, 0 deletions
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index e6dc735..8319c49 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -6,6 +6,10 @@
#include <xmmintrin.h>
+#ifdef __SSE4_1__
+// Intel's SSE 4.1
+# include <smmintrin.h>
+#endif
#ifdef __SSE4A__
// AMD's SSE 4a
# include <ammintrin.h>
@@ -13,6 +17,8 @@
+#define vec4_architecture "SSE"
+
// Vector type corresponding to CCTK_REAL
#define CCTK_REAL4_VEC __m128
@@ -292,3 +298,23 @@ static const union {
#define k4exp(x) K4REPL(exp,x)
#define k4log(x) K4REPL(log,x)
#define k4pow(x,a) K4REPL2(pow,x,a)
+
+// Choice [sign(x)>0 ? y : z]
+#ifdef __SSE4_1__
+# define k4ifthen(x,y,z) (_mm_blendv_ps(y,z,x))
+#else
+# define k4ifthen(x,y,z) \
+ ({ \
+ CCTK_REAL4_VEC const xx=(x_); \
+ CCTK_REAL4_VEC const x=xx; \
+ CCTK_REAL4_VEC const yy=(y_); \
+ CCTK_REAL4_VEC const y=yy; \
+ CCTK_REAL4_VEC const zz=(z_); \
+ CCTK_REAL4_VEC const z=zz; \
+ CCTK_REAL4_VEC const c = _mm_and_ps(x,k4sign_mask); \
+ vec4_set(vec4_elt0(not vec4_elt0(c) ? y : z), \
+ vec4_elt1(not vec4_elt1(c) ? y : z), \
+ vec4_elt2(not vec4_elt2(c) ? y : z), \
+ vec4_elt3(not vec4_elt3(c) ? y : z)); \
+ })
+#endif