aboutsummaryrefslogtreecommitdiff
path: root/src/vectors.h
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-06-06 10:11:44 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-06-06 10:11:44 +0000
commit2ab4d61cd4b632c0e991c781f3c15f3b054d1bbd (patch)
tree6664b1e9ee360ee0abf9df6b9a5562eb5bdc88c5 /src/vectors.h
parent5d4858e0736a0c0881c65b9e9ac0983d3b5bb24b (diff)
Introduce Cactus options for vectorisation
Introduce configuration-time options for vectorisation, including options to allow architecture-specific choices that may influence performance. Introduce "middle" masked stores for large vector sizes and small loops. Clean up and simplify some of the implementation code. git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@10 105869f7-3296-0410-a4ea-f4349344b45a
Diffstat (limited to 'src/vectors.h')
-rw-r--r--src/vectors.h52
1 files changed, 28 insertions, 24 deletions
diff --git a/src/vectors.h b/src/vectors.h
index a3cad46..03296e7 100644
--- a/src/vectors.h
+++ b/src/vectors.h
@@ -5,22 +5,19 @@
-#undef EMULATE_AVX
-
-
-
-#if defined(KRANC_VECTORS)
+#if VECTORISE
+/* TOOD: support AVX */
# if defined(__SSE__) // Intel SSE
# include "vectors-4-SSE.h"
# elif defined(__ALTIVEC__) // Power Altivec
# include "vectors-4-Altivec.h"
# endif
-# if defined(__AVX__) // Intel AVX
+# if defined(__AVX__) // Intel AVX
# include "vectors-8-AVX.h"
-# elif defined(__SSE2__) // Intel SSE2
-# if defined(EMULATE_AVX)
+# elif defined(__SSE2__) // Intel SSE2
+# if VECTORISE_EMULATE_AVX
# include "vectors-8-AVX.h"
# else
# include "vectors-8-SSE2.h"
@@ -56,14 +53,15 @@
# define vec_elt0 vec4_elt0
# define vec_elt vec4_elt
-# define vec_load vec4_load
-# define vec_loadu vec4_loadu
-# define vec_loadu_maybe vec4_loadu_maybe
-# define vec_loadu_maybe3 vec4_loadu_maybe3
-# define vec_store vec4_store
-# define vec_store_nta vec4_store_nta
-# define vec_store_nta_partial_lo vec4_store_nta_partial_lo
-# define vec_store_nta_partial_hi vec4_store_nta_partial_hi
+# define vec_load vec4_load
+# define vec_loadu vec4_loadu
+# define vec_loadu_maybe vec4_loadu_maybe
+# define vec_loadu_maybe3 vec4_loadu_maybe3
+# define vec_store vec4_store
+# define vec_store_nta vec4_store_nta
+# define vec_store_nta_partial_lo vec4_store_nta_partial_lo
+# define vec_store_nta_partial_hi vec4_store_nta_partial_hi
+# define vec_store_nta_partial_mid vec4_store_nta_partial_mid
# define kpos k4pos
# define kneg k4neg
@@ -98,14 +96,15 @@
# define vec_elt0 vec8_elt0
# define vec_elt vec8_elt
-# define vec_load vec8_load
-# define vec_loadu vec8_loadu
-# define vec_loadu_maybe vec8_loadu_maybe
-# define vec_loadu_maybe3 vec8_loadu_maybe3
-# define vec_store vec8_store
-# define vec_store_nta vec8_store_nta
-# define vec_store_nta_partial_lo vec8_store_nta_partial_lo
-# define vec_store_nta_partial_hi vec8_store_nta_partial_hi
+# define vec_load vec8_load
+# define vec_loadu vec8_loadu
+# define vec_loadu_maybe vec8_loadu_maybe
+# define vec_loadu_maybe3 vec8_loadu_maybe3
+# define vec_store vec8_store
+# define vec_store_nta vec8_store_nta
+# define vec_store_nta_partial_lo vec8_store_nta_partial_lo
+# define vec_store_nta_partial_hi vec8_store_nta_partial_hi
+# define vec_store_nta_partial_mid vec8_store_nta_partial_mid
# define kpos k8pos
# define kneg k8neg
@@ -285,6 +284,11 @@ struct vecprops<CCTK_REAL8> {
// For Kranc
+#undef KRANC_DIFF_FUNCTIONS
+#if ! VECTORISE_INLINE
+# define KRANC_DIFF_FUNCTIONS
+#endif
+
#undef ToReal
#define ToReal(x) (vec_set1((CCTK_REAL)(x)))