diff options
Diffstat (limited to 'src/indirect/vectors-intel.hh')
-rw-r--r-- | src/indirect/vectors-intel.hh | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/src/indirect/vectors-intel.hh b/src/indirect/vectors-intel.hh new file mode 100644 index 0000000..29da84f --- /dev/null +++ b/src/indirect/vectors-intel.hh @@ -0,0 +1,390 @@ +#include <cmath> +#include <cstdlib> +using namespace std; + + + +#if defined(__SSE__) // SSE (Intel) + +#include <xmmintrin.h> + +template<> +struct vec_T<float> { + typedef float scalar_t; + typedef __m128 impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_ps(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1, + scalar_t const& a2, scalar_t const& a3) + : v(_mm_set_ps(a3,a2,a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f32(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(1,0,3,2))); + case 2: return elt0(_mm_unpackhi_ps(v,v)); + case 3: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(3,2,1,0))); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_ps(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_ps(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_ps(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_ps(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_ps(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[2]=v[2]; + case 2: (&p)[1]=v[1]; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[1]=v[1]; + case 2: (&p)[2]=v[2]; + case 1: (&p)[3]=v[3]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return _mm_and_ps(v,_mm_set1_pi32(0x7fffffffU)); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_max_ps(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_min_ps(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return _mm_sqrt_ps(x.v); +} + +#endif + + + +#if defined(__SSE2__) // SSE2 (Intel) + +#include <emmintrin.h> + +template<> +struct vec_T<double> { + typedef double scalar_t; + typedef __m128d impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_pd(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1) + : v(_mm_set_pd(a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f64(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_unpackhi_pd(v,v)); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_pd(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_pd(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_pd(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_pd(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_pd(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[1]=v[1]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return _mm_and_pd(v,_mm_set1_epi64(0x7fffffffffffffffULL)); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_max_pd(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_min_pd(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return _mm_sqrt_pd(x.v); +} + +#endif |