diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2010-12-07 10:06:29 -0600 |
---|---|---|
committer | Erik Schnetter <schnetter@cct.lsu.edu> | 2010-12-07 10:06:29 -0600 |
commit | 486369b20142c236b27dfc1d49a4d1c42a62c179 (patch) | |
tree | df3f426cfdb3629e03ae13bc3217244b9e50c79e /Auxiliary/Cactus/KrancNumericalTools/GenericFD/src | |
parent | 15a542847a134f51fdd94a9a1ea46cf883de8226 (diff) |
Remove outdated vectorisation implementation. An equivalent functionality is now available in LSUThorns/Vectors.
Diffstat (limited to 'Auxiliary/Cactus/KrancNumericalTools/GenericFD/src')
10 files changed, 0 insertions, 1523 deletions
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh deleted file mode 100644 index 12cd6e8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh +++ /dev/null @@ -1,135 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - -// Use the type __m128d directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include <emmintrin.h> - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (_mm_set1_pd(a)) -#define vec_set(a,b) (_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// # define vec_elt0(x) (*(CCTK_REAL const*)&(x)) -# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; }) -#else -// this is a no-op -# define vec_elt0(x) (_mm_cvtsd_f64(x)) -#endif -#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); }) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (_mm_load_pd(&(p))) -#define vec_loadu(p) (_mm_loadu_pd(&(p))) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: Always use unaligned load -#define vec_loadu_maybe(off,p) (vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p)) -#if 0 -#define vec_loadu_maybe(off,p) \ - (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#endif - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (_mm_store_pd(&(p),x)) -#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x)) -#if defined(KRANC_CACHE) -# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x)) -#else -# define vec_store_nta(p,x) (_mm_store_pd(&(p),x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) -#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x)) - - - -// Functions and operators - -// Operators -#undef fneg -#undef fmul -#undef fdiv -#undef fadd -#undef fsub -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL}; -# define vec_neg_mask (vec_neg_mask_impl.v) -#else -# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL}) -#endif -#define fneg(x) (_mm_xor_pd(x,vec_neg_mask)) -#define fmul(x,y) (_mm_mul_pd(x,y)) -#define fdiv(x,y) (_mm_div_pd(x,y)) -#define fadd(x,y) (_mm_add_pd(x,y)) -#define fsub(x,y) (_mm_sub_pd(x,y)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#undef ksqrt -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL}; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL}) -#endif -#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask)) -#define kfmax(x,y) (_mm_max_pd(x,y)) -#define kfmin(x,y) (_mm_min_pd(x,y)) -#define ksqrt(x) (_mm_sqrt_pd(x)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh deleted file mode 100644 index b74fac0..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh +++ /dev/null @@ -1,201 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - - - -#include <emmintrin.h> - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // Underlying scalar and vector types - typedef double S; - typedef __m128d V; - - // Payload - V v; - - // Empty constructur - inline CCTK_REAL_VEC() { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - // Convert from the underlying scalar type - inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } - inline CCTK_REAL_VEC(int const& a): v(_mm_set1_pd(S(a))) { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; })) -// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x)) -// This generates the fastest code with PGI compilers -DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; })) -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op -#endif -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x))) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template<int mod> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template<int mod1,int mod2,int mod3> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x)) -#if defined(KRANC_CACHE) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) -#else -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -static CCTK_REAL_VEC const vec_neg_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Cheap functions -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC::V v; -} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -static CCTK_REAL_VEC const vec_fabs_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -#endif -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - - - -#undef Sign -#define Sign(x) (42) - -// #undef ToReal -// #define ToReal(x) vec_set1(x) - -#if defined(__PGI) && defined (__amd64__) -// Special case for PGI 9.0.4 to avoid an internal compiler error -#undef IfThen -static inline -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) -{ - union { - __m128i vi; - CCTK_REAL_VEC::V v; - } mask; - mask.vi = _mm_set1_epi64x(-(long long)cond); - return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v)); -} -#endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh deleted file mode 100644 index 7e06017..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh +++ /dev/null @@ -1,111 +0,0 @@ -// Vectorise using IBM's Altivec - -// Use the type vector double directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include <altivec.h> - -// Vector type corresponding to CCTK_REAL -typedef vector double CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (vec_splats(a)) -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -union vec_mask { - double elts[2]; - vector double v; -}; -# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; }) -#else -# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; }) -#endif - -// Get a scalar from the vector -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; }) -# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; }) -#else -# define vec_elt0(x) ((x)[0]) -# define vec_elt1(x) ((x)[1]) -#endif - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p)) -#define vec_loadu(p) (vec_load(p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -// TODO: Use stvxl instruction? -#define vec_store_nta(p,x) vec_store(p,x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x)) -#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x)) - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -#define fmadd(x,y,z) (vec_madd(x,y,z)) -#define fmsub(x,y,z) (vec_msub(x,y,z)) -#define fnmadd(x,y,z) (vec_nmadd(x,y,z)) -#define fnmsub(x,y,z) (vec_nmsub(x,y,z)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#define kfabs(x) (vec_abs(x)) -#define kfmax(x,y) (vec_max(x,y)) -#define kfmin(x,y) (vec_min(x,y)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#undef ksqrt -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) -#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1((CCTK_REAL)(x))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh deleted file mode 100644 index 3fc97f6..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh +++ /dev/null @@ -1,212 +0,0 @@ -// Vectorise using IBM's Altivec - - - -#include <altivec.h> - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - union vec_mask { - S elts[2]; - V v; - }; - - // Set a vector from scalars -#if 0 - // IBM - inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; } -#endif -#if 0 - inline CCTK_REAL_VEC(S const a, S const b): - v(vec_mergel(vec_splats(a), vec_splats(b))) { } -#endif - inline CCTK_REAL_VEC(S const a, S const b) - { - vec_mask x; - x.elts[0] = a; - x.elts[1] = b; - v = x.v; - } - - // Set a vector from a scalar, replicating the scalar - // Note: Could also use vec_xlds instead - inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -#if 0 -// IBM -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) -#endif -static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[0]; -} -static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[1]; -} - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -#if 0 -// IBM -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast<CCTK_REAL*>(&p))) -#endif -DEFINE_FUNCTION_PR_V(vec_loadu,p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template<int mod> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template<int mod1,int mod2,int mod3> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x) -// TODO: Use stvxl instruction? -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: p=vec_elt0(x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: (&p)[1]=vec_elt1(x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -#undef Sign -#define Sign(x) (42) - -// #undef ToReal -// #define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh deleted file mode 100644 index f928ed8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh +++ /dev/null @@ -1,31 +0,0 @@ -// Fallback vectorisation implementation: Do not vectorise - - - -// Use CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 1; - - - -// We use macros here, so that we are not surprised by compilers which -// don't like to inline functions (e.g. PGI). This should also make -// debug builds (which may not inline) more efficient. - -#define vec_load(p) (p) -#define vec_loadu(p) (p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (p) -#define vec_loadu_maybe3(off1,off2,off3,p) (p) - -#define vec_store(p,x) ((p)=(x)) -#define vec_store_nta(p,x) ((p)=(x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (assert(0)) -#define vec_store_nta_partial_hi(p,x,n) (assert(0)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh deleted file mode 100644 index f5c0b22..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh +++ /dev/null @@ -1,104 +0,0 @@ -// Define some macros that simplify defining short function that are -// supposed to be inlined - - - -// Letters defining the prototype (argument and return value types): -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - - - -// Load and store - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \ -{ \ - expr; \ -} - - - -// Functions and operators - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VVV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh deleted file mode 100644 index df83b3a..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh +++ /dev/null @@ -1,591 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include <assert.h> -#include <math.h> -#include <stdlib.h> - -#include <cctk.h> - - - -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - - - -// Intel, double -#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) - -#include <emmintrin.h> - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef __m128d V; - V v; - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -union vec_mask { - unsigned long long bits[2]; - CCTK_REAL_VEC::V v; -}; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op -#endif - -#if 0 -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) -#endif -static inline -CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_storeh_pd(&a,x); return a; -} - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -#if 0 -// Load a partial vector (duplicating the last loaded element to fill -// the remaining elements) -// TODO: Should this be aligned or unaligned? -static inline -CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n) -{ - switch (n) { - case 1: return _mm_load1_pd(p); - default: assert(0); - } -} -#endif - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template<int mod> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template<int mod1,int mod2,int mod3> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -template<int off> -static inline -CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl<off&(2-1)>(p); -} -template<int off1,int off2, int off3> -static inline -CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl3<off1&(2-1),off2&(2-1),off3&(2-1)>(p); -} - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -#if 0 -DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x) -#endif -static vec_mask const vec_neg_mask = -{ { 0x8000000000000000ULL, 0x8000000000000000ULL } }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -// Special case for PGI to avoid internal compiler error -#if defined(__PGI) && defined (__amd64__) -#undef IfThen -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\ -nst& y) -{ - return cond*x + (not cond)*y; -} -#endif - - - -#if 0 -// Try to use the __m128d type directly. - -// This does not really work, because it is not possible to define -// automatic conversion operators from double to __m128d, so that -// explicit conversions are required. This makes the code look more -// clumsy. - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ -#if 0 - // _mm_cvtsd_f64 does not exist on PGI compilers - return _mm_cvtsd_f64(x); // this is a no-op -#endif - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} - -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -#endif - - - -// Intel, float -#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) - -#include <xmmintrin.h> - -// A vector type corresponding to CCTK_REAL -typedef __m128 CCTK_REAL_VEC; - - - -// Power, double -#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8) - -#include <altivec.h> - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - - // vec_insert, vec_extract, vec_splat - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast<CCTK_REAL*>(&p))) -// vec_xlds - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template<int mod> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -#define static -template<> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p)) -#undef static -// Call the implementation with the modulus -template<int off> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl<off&(2-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: p=x.v[0]; break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: (&p)[1]=x.v[1]; break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -// Fallback: pseudo-vectorisation -#elif 0 - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v, w; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { } -}; - - - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -template<int off> -DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w)) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w)) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w)) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w)) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w)) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w)) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w)) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w))) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a))) - - - -// Fallback: no vectorisation -#else - -// There is no vector type corresponding to CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - - - -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,p) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template<int off> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p) -template<int off1,int off2, int off3> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p) - -DEFINE_FUNCTION_PRV(vec_store,p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} - - - -#endif - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V - - - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh deleted file mode 100644 index f439c9b..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh +++ /dev/null @@ -1,72 +0,0 @@ -// Pseudo vectorisation using scalar operations - - - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 2; - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v[CCTK_REAL_VEC_SIZE]; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; } -}; - - - -// Load and store vectors - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - - - -// Functions and operators - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1])) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1])) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1])) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1])) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1])) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1])) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1])) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1])) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1])) - -// Functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1]))) -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1]))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1]))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1]))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1]))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1]))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh deleted file mode 100644 index 0d950c7..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh +++ /dev/null @@ -1,14 +0,0 @@ -// Undefine all macros defined in "Vectors-define.hh", so that we -// leave a clean namespace - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V -#undef DEFINE_FUNCTION_VVV_V diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh deleted file mode 100644 index d32afb2..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include <assert.h> -#include <math.h> -#include <stdlib.h> - -#include <cctk.h> - - - -#include "Vectors-define.hh" - -#if defined(KRANC_VECTORS) -// Vectorise - -# if ! defined(CCTK_REAL_PRECISION_8) -# error "Vectorisation is currently only supported for double precision" -# endif - -# if defined(__SSE2__) // SSE2 (Intel) -# if defined(KRANC_DIRECT) -# include "Vectors-SSE2-direct.hh" -# else -# include "Vectors-SSE2.hh" -# endif -# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power) -# if defined(KRANC_DIRECT) -# include "Vectors-VSX-direct.hh" -# else -# include "Vectors-VSX.hh" -# endif -# else -# include "Vectors-pseudo.hh" -# endif - -#else -// Don't vectorise - -# include "Vectors-default.hh" - -#endif - -#include "Vectors-undefine.hh" - - - -#endif // #ifndef VECTORS_HH |