diff options
author | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2011-06-06 10:11:44 +0000 |
---|---|---|
committer | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2011-06-06 10:11:44 +0000 |
commit | 2ab4d61cd4b632c0e991c781f3c15f3b054d1bbd (patch) | |
tree | 6664b1e9ee360ee0abf9df6b9a5562eb5bdc88c5 /src/vectors.h | |
parent | 5d4858e0736a0c0881c65b9e9ac0983d3b5bb24b (diff) |
Introduce Cactus options for vectorisation
Introduce configuration-time options for vectorisation, including
options to allow architecture-specific choices that may influence
performance.
Introduce "middle" masked stores for large vector sizes and small
loops.
Clean up and simplify some of the implementation code.
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@10 105869f7-3296-0410-a4ea-f4349344b45a
Diffstat (limited to 'src/vectors.h')
-rw-r--r-- | src/vectors.h | 52 |
1 files changed, 28 insertions, 24 deletions
diff --git a/src/vectors.h b/src/vectors.h index a3cad46..03296e7 100644 --- a/src/vectors.h +++ b/src/vectors.h @@ -5,22 +5,19 @@ -#undef EMULATE_AVX - - - -#if defined(KRANC_VECTORS) +#if VECTORISE +/* TOOD: support AVX */ # if defined(__SSE__) // Intel SSE # include "vectors-4-SSE.h" # elif defined(__ALTIVEC__) // Power Altivec # include "vectors-4-Altivec.h" # endif -# if defined(__AVX__) // Intel AVX +# if defined(__AVX__) // Intel AVX # include "vectors-8-AVX.h" -# elif defined(__SSE2__) // Intel SSE2 -# if defined(EMULATE_AVX) +# elif defined(__SSE2__) // Intel SSE2 +# if VECTORISE_EMULATE_AVX # include "vectors-8-AVX.h" # else # include "vectors-8-SSE2.h" @@ -56,14 +53,15 @@ # define vec_elt0 vec4_elt0 # define vec_elt vec4_elt -# define vec_load vec4_load -# define vec_loadu vec4_loadu -# define vec_loadu_maybe vec4_loadu_maybe -# define vec_loadu_maybe3 vec4_loadu_maybe3 -# define vec_store vec4_store -# define vec_store_nta vec4_store_nta -# define vec_store_nta_partial_lo vec4_store_nta_partial_lo -# define vec_store_nta_partial_hi vec4_store_nta_partial_hi +# define vec_load vec4_load +# define vec_loadu vec4_loadu +# define vec_loadu_maybe vec4_loadu_maybe +# define vec_loadu_maybe3 vec4_loadu_maybe3 +# define vec_store vec4_store +# define vec_store_nta vec4_store_nta +# define vec_store_nta_partial_lo vec4_store_nta_partial_lo +# define vec_store_nta_partial_hi vec4_store_nta_partial_hi +# define vec_store_nta_partial_mid vec4_store_nta_partial_mid # define kpos k4pos # define kneg k4neg @@ -98,14 +96,15 @@ # define vec_elt0 vec8_elt0 # define vec_elt vec8_elt -# define vec_load vec8_load -# define vec_loadu vec8_loadu -# define vec_loadu_maybe vec8_loadu_maybe -# define vec_loadu_maybe3 vec8_loadu_maybe3 -# define vec_store vec8_store -# define vec_store_nta vec8_store_nta -# define vec_store_nta_partial_lo vec8_store_nta_partial_lo -# define vec_store_nta_partial_hi vec8_store_nta_partial_hi +# define vec_load vec8_load +# define vec_loadu vec8_loadu +# define vec_loadu_maybe vec8_loadu_maybe +# define vec_loadu_maybe3 vec8_loadu_maybe3 +# define vec_store vec8_store +# define vec_store_nta vec8_store_nta +# define vec_store_nta_partial_lo vec8_store_nta_partial_lo +# define vec_store_nta_partial_hi vec8_store_nta_partial_hi +# define vec_store_nta_partial_mid vec8_store_nta_partial_mid # define kpos k8pos # define kneg k8neg @@ -285,6 +284,11 @@ struct vecprops<CCTK_REAL8> { // For Kranc +#undef KRANC_DIFF_FUNCTIONS +#if ! VECTORISE_INLINE +# define KRANC_DIFF_FUNCTIONS +#endif + #undef ToReal #define ToReal(x) (vec_set1((CCTK_REAL)(x))) |