diff options
author | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2010-11-29 16:40:49 +0000 |
---|---|---|
committer | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2010-11-29 16:40:49 +0000 |
commit | 96b656fc2d33f2d5238db7e8de17b11c5893084b (patch) | |
tree | 3857f6f068ec1368adf8bd0d5ca012e7e705f0ef | |
parent | 312c4501b28f9d0b43c14451f383b3341b96c759 (diff) |
Add initial implementation
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@3 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r-- | doc/documentation.tex | 144 | ||||
-rw-r--r-- | src/indirect/vectors-default.hh | 226 | ||||
-rw-r--r-- | src/indirect/vectors-intel.hh | 390 | ||||
-rw-r--r-- | src/indirect/vectors-power.hh | 360 | ||||
-rw-r--r-- | src/indirect/vectors-pseudo.hh | 183 | ||||
-rw-r--r-- | src/indirect/vectors.hh | 19 |
6 files changed, 1322 insertions, 0 deletions
diff --git a/doc/documentation.tex b/doc/documentation.tex new file mode 100644 index 0000000..55a3762 --- /dev/null +++ b/doc/documentation.tex @@ -0,0 +1,144 @@ +% *======================================================================* +% Cactus Thorn template for ThornGuide documentation +% Author: Ian Kelley +% Date: Sun Jun 02, 2002 +% $Header$ +% +% Thorn documentation in the latex file doc/documentation.tex +% will be included in ThornGuides built with the Cactus make system. +% The scripts employed by the make system automatically include +% pages about variables, parameters and scheduling parsed from the +% relevant thorn CCL files. +% +% This template contains guidelines which help to assure that your +% documentation will be correctly added to ThornGuides. More +% information is available in the Cactus UsersGuide. +% +% Guidelines: +% - Do not change anything before the line +% % START CACTUS THORNGUIDE", +% except for filling in the title, author, date, etc. fields. +% - Each of these fields should only be on ONE line. +% - Author names should be separated with a \\ or a comma. +% - You can define your own macros, but they must appear after +% the START CACTUS THORNGUIDE line, and must not redefine standard +% latex commands. +% - To avoid name clashes with other thorns, 'labels', 'citations', +% 'references', and 'image' names should conform to the following +% convention: +% ARRANGEMENT_THORN_LABEL +% For example, an image wave.eps in the arrangement CactusWave and +% thorn WaveToyC should be renamed to CactusWave_WaveToyC_wave.eps +% - Graphics should only be included using the graphicx package. +% More specifically, with the "\includegraphics" command. Do +% not specify any graphic file extensions in your .tex file. This +% will allow us to create a PDF version of the ThornGuide +% via pdflatex. +% - References should be included with the latex "\bibitem" command. +% - Use \begin{abstract}...\end{abstract} instead of \abstract{...} +% - Do not use \appendix, instead include any appendices you need as +% standard sections. +% - For the benefit of our Perl scripts, and for future extensions, +% please use simple latex. +% +% *======================================================================* +% +% Example of including a graphic image: +% \begin{figure}[ht] +% \begin{center} +% \includegraphics[width=6cm]{MyArrangement_MyThorn_MyFigure} +% \end{center} +% \caption{Illustration of this and that} +% \label{MyArrangement_MyThorn_MyLabel} +% \end{figure} +% +% Example of using a label: +% \label{MyArrangement_MyThorn_MyLabel} +% +% Example of a citation: +% \cite{MyArrangement_MyThorn_Author99} +% +% Example of including a reference +% \bibitem{MyArrangement_MyThorn_Author99} +% {J. Author, {\em The Title of the Book, Journal, or periodical}, 1 (1999), +% 1--16. {\tt http://www.nowhere.com/}} +% +% *======================================================================* + +% If you are using CVS use this line to give version information +% $Header$ + +\documentclass{article} + +% Use the Cactus ThornGuide style file +% (Automatically used from Cactus distribution, if you have a +% thorn without the Cactus Flesh download this from the Cactus +% homepage at www.cactuscode.org) +\usepackage{../../../../doc/latex/cactus} + +\begin{document} + +% The author of the documentation +\author{Erik Schnetter \textless schnetter@cct.lsu.edu\textgreater} + +% The title of the document (not necessarily the name of the Thorn) +\title{Vectors} + +% the date your document was last changed, if your document is in CVS, +% please use: +% \date{$ $Date: 2004-01-07 14:12:39 -0600 (Wed, 07 Jan 2004) $ $} +\date{November 24 2010} + +\maketitle + +% Do not delete next line +% START CACTUS THORNGUIDE + +% Add all definitions used in this documentation here +% \def\mydef etc + +% Add an abstract for this thorn's documentation +\begin{abstract} + +\end{abstract} + +% The following sections are suggestive only. +% Remove them or add your own. + +\section{Introduction} + +\section{Physical System} + +\section{Numerical Implementation} + +\section{Using This Thorn} + +\subsection{Obtaining This Thorn} + +\subsection{Basic Usage} + +\subsection{Special Behaviour} + +\subsection{Interaction With Other Thorns} + +\subsection{Examples} + +\subsection{Support and Feedback} + +\section{History} + +\subsection{Thorn Source Code} + +\subsection{Thorn Documentation} + +\subsection{Acknowledgements} + + +\begin{thebibliography}{9} + +\end{thebibliography} + +% Do not delete next line +% END CACTUS THORNGUIDE + +\end{document} diff --git a/src/indirect/vectors-default.hh b/src/indirect/vectors-default.hh new file mode 100644 index 0000000..9a1e1d3 --- /dev/null +++ b/src/indirect/vectors-default.hh @@ -0,0 +1,226 @@ +using namespace std; + + + +// A class template that provides a vectorised type for the underlying +// scalar type T. This implementation does "nothing", i.e. just +// provides a "vector" class with a vector size of 1, forwarding all +// operations to the scalar type. +// +// This implementation uses small integers in several places, e.g. in +// the [] operator. This is efficient only if these integers are +// compile time constants, so that the compiler can remove the +// corresponding if and switch statements. +template<typename T> +struct vec_t { + + // Names for the underlying scalar type, and for the vector type + // used to implement this class. For example, with SSE2, it would be + // scalar_t=double, and impl_t=__m128d. + typedef T scalar_t; + typedef T impl_t; + + // The payload -- the actual vector content + impl_t v; + + // Vector size (number of elements) + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + // Constructors + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(a) + { + } + + // Convert to the implementation vector type + inline operator impl_t () + { + return v; + } + + // Access individual vector elements + inline scalar_t operator[] (size_t const d) const + { + return v; + } + + // Load vectors from memory. For convenience when using this class, + // these accept references to the scalar type instead of pointers to + // the vector type. These routines are static members of the class, + // so that they can be used as VEC::load(p); if they were + // stand-alone functions, one would have to write load<SCALAR>(p) + // instead. + // + // Aligned load + static inline vec_t load (scalar_t const& p) + { + return p; + } + // Unaligned load + static inline vec_t loadu (scalar_t const& p) + { + return p; + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size. These functions are + // useful e.g. for loading neightbouring grid points while + // evaluating finite differencing stencils. + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + return p; + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + return p; + } +}; + +// Store vectors to memory. These routines are stand-alone functions, +// so that they can be used as vec_store(p,x); if they were class +// members, one would have to write x.store(p) instead, or possibly +// VEC::store(p,x). +// +// Aligned store +template<typename T> +inline void vec_store (typename vec_t<T>::scalar_t& p, vec_t<T> const& x) +{ + p=x.v; +} +// Unaligned store +template<typename T> +inline void vec_storeu (typename vec_t<T>::scalar_t& p, vec_t<T> const& x) +{ + p=x.v; +} +// Non-temporal store, i.e. a store that bypasses the cache +template<typename T> +inline void vec_store_nta (typename vec_t<T>::scalar_t& p, vec_t<T> const& x) +{ + p=x.v; +} +// Store the cnt lower elements of a vector, bypassing the cache if +// possible +template<typename T> +inline void vec_store_nta_partial_lo (typename vec_t<T>::scalar_t& p, + vec_t<T> const& x, + size_t const cnt) +{ + assert(0); +} +// Store the cnt higher elements of a vector, bypassing the cache if +// possible. This stores the vector elements into memory locations as +// if element 0 were stored at p. +template<typename T> +inline void vec_store_nta_partial_hi (typename vec_t<T>::scalar_t& p, + vec_t<T> const& x, + size_t const cnt) +{ + assert(0); +} + +template<typename T> +inline vec_t<T> operator+ (vec_t<T> const& x) +{ + return +x.v; +} +template<typename T> +inline vec_t<T> operator- (vec_t<T> const& x) +{ + return -x.v; +} + +template<typename T> +inline vec_t<T> operator+ (vec_t<T> const& x, vec_t<T> const& y) +{ + return x.v + y.v; +} +template<typename T> +inline vec_t<T> operator- (vec_t<T> const& x, vec_t<T> const& y) +{ + return x.v - y.v; +} +template<typename T> +inline vec_t<T> operator* (vec_t<T> const& x, vec_t<T> const& y) +{ + return x.v * y.v; +} +template<typename T> +inline vec_t<T> operator/ (vec_t<T> const& x, vec_t<T> const& y) +{ + return x.v / y.v; +} + +template<typename T> +inline vec_t<T>& operator+= (vec_t<T>& x, vec_t<T> const& y) +{ + x.v += y.v; + return x; +} +template<typename T> +inline vec_t<T>& operator-= (vec_t<T>& x, vec_t<T> const& y) +{ + x.v -= y.v; + return x; +} +template<typename T> +inline vec_t<T>& operator*= (vec_t<T>& x, vec_t<T> const& y) +{ + x.v *= y.v; + return x; +} +template<typename T> +inline vec_t<T>& operator/= (vec_t<T>& x, vec_t<T> const& y) +{ + x.v /= y.v; + return x; +} + +template<typename T> +inline vec_t<T> exp (vec_t<T> const& x) +{ + return exp(x.v); +} +template<typename T> +inline vec_t<T> fabs (vec_t<T> const& x) +{ + return fabs(x.v); +} +template<typename T> +inline vec_t<T> log (vec_t<T> const& x) +{ + return log(x.v); +} +template<typename T> +inline vec_t<T> sqrt (vec_t<T> const& x) +{ + return sqrt(x.v); +} + +template<typename T> +inline vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return fmax(x.v, y.v); +} +template<typename T> +inline vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return fmin(x.v, y.v); +} +template<typename T> +inline vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return pow(x.v, a); +} +template<typename T> +inline vec_t<T> pow (vec_t<T> const& x, int const& i) +{ + return pow(x.v, i); +} diff --git a/src/indirect/vectors-intel.hh b/src/indirect/vectors-intel.hh new file mode 100644 index 0000000..29da84f --- /dev/null +++ b/src/indirect/vectors-intel.hh @@ -0,0 +1,390 @@ +#include <cmath> +#include <cstdlib> +using namespace std; + + + +#if defined(__SSE__) // SSE (Intel) + +#include <xmmintrin.h> + +template<> +struct vec_T<float> { + typedef float scalar_t; + typedef __m128 impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_ps(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1, + scalar_t const& a2, scalar_t const& a3) + : v(_mm_set_ps(a3,a2,a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f32(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(1,0,3,2))); + case 2: return elt0(_mm_unpackhi_ps(v,v)); + case 3: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(3,2,1,0))); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_ps(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_ps(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_ps(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_ps(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_ps(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[2]=v[2]; + case 2: (&p)[1]=v[1]; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[1]=v[1]; + case 2: (&p)[2]=v[2]; + case 1: (&p)[3]=v[3]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return _mm_and_ps(v,_mm_set1_pi32(0x7fffffffU)); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_max_ps(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_min_ps(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return _mm_sqrt_ps(x.v); +} + +#endif + + + +#if defined(__SSE2__) // SSE2 (Intel) + +#include <emmintrin.h> + +template<> +struct vec_T<double> { + typedef double scalar_t; + typedef __m128d impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_pd(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1) + : v(_mm_set_pd(a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f64(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_unpackhi_pd(v,v)); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_pd(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_pd(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_pd(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_pd(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_pd(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[1]=v[1]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return _mm_and_pd(v,_mm_set1_epi64(0x7fffffffffffffffULL)); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_max_pd(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return _mm_min_pd(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return _mm_sqrt_pd(x.v); +} + +#endif diff --git a/src/indirect/vectors-power.hh b/src/indirect/vectors-power.hh new file mode 100644 index 0000000..37a8786 --- /dev/null +++ b/src/indirect/vectors-power.hh @@ -0,0 +1,360 @@ +#include <cmath> +#include <cstdlib> +using namespace std; + + + +#if defined(__ALTIVEC__) // Altivec (Power) + +#include <altivec.h> + +template<> +struct vec_t<float> { + typedef float scalar_t; + typedef vector float impl_t; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(vec_splats(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1, + scalar_t const& a2, scalar_t const& a3) + { + v[0]=a0; v[1]=a1; v[2]=a2; v[3]=a3; + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + + static inline vec_t load (scalar_t const& a) + { + return *(impl_t const*)&a; + } + static inline vec_t loadu (scalar_t const& a) + { + return vec_load(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + *(impl_t*)p = v; + } + inline void storeu (scalar_t& p) const + { + store(p); + } + inline void store_nta (scalar_t& p) const + { + // TODO: Use stvxl instruction? + store(p); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[2]=v[2]; + case 2: (&p)[1]=v[1]; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[1]=v[1]; + case 2: (&p)[2]=v[2]; + case 1: (&p)[3]=v[3]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return vec_abs(x.v); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return vec_max(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return vec_min(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return vec_t(sqrt(x.v[0]), sqrt(x.v[1]), sqrt(x.v[2]), sqrt(x.v[3])); +} + +#endif + + + +#if defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec VSX (Power) + +#include <altivec.h> + +template<> +struct vec_t<double> { + typedef double scalar_t; + typedef vector double impl_t; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(vec_splats(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1) + { + v[0]=a0; v[1]=a1; + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + + static inline vec_t load (scalar_t const& a) + { + return *(impl_t const*)&a; + } + static inline vec_t loadu (scalar_t const& a) + { + return vec_load(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + *(impl_t*)p = v; + } + inline void storeu (scalar_t& p) const + { + store(p); + } + inline void store_nta (scalar_t& p) const + { + // TODO: Use stvxl instruction? + store(p); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[1]=v[1]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1])); +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + return vec_abs(x.v); +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + return vec_max(x.v, y.v); +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + return vec_min(x.v, y.v); +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1])); +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a)); +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + return vec_t(sqrt(x.v[0]), sqrt(x.v[1])); +} + +#endif diff --git a/src/indirect/vectors-pseudo.hh b/src/indirect/vectors-pseudo.hh new file mode 100644 index 0000000..a5cca83 --- /dev/null +++ b/src/indirect/vectors-pseudo.hh @@ -0,0 +1,183 @@ +#include <cmath> +#include <cstdlib> +using namespace std; + +template<typename T> +struct vec_t { + size_t const D = 2; + typedef T scalar_t; + typedef T impl_t; + impl_t v[D]; + + static inline size_t size() + { + return D; + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + { + for (size_t d=0; d<D; ++d) v[d]=a; + } + + inline operator impl_t () + { + return v; + } + + inline scalar_t operator[] (size_t const d) const + { + return v[d]; + } + + static inline vec_t load (scalar_t const& a) + { + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=(&a)[d]; + return r; + } + static inline vec_t loadu (scalar_t const& a) + { + return load(a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + return load(a); + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + return load(a); + } + inline void store (scalar_t& p) const + { + for (size_t d=0; d<D; ++d) (&p)[d]=v[d]; + } + inline void storeu (scalar_t& p) const + { + store(p); + } + inline void store_nta (scalar_t& p) const + { + store(p); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + for (size_t d=0; d<cnt; ++d) (&p)[d]=v[d]; + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + for (size_t d=D-cnt; d<D; ++d) (&p)[d]=v[d]; + } + + inline vec_t operator+ () const + { + vec_t r=*this; + for (size_t d=0; d<D; ++d) r.v[d]=+v[d]; + return r; + } + inline vec_t operator- () const + { + vec_t r=*this; + for (size_t d=0; d<D; ++d) r.v[d]=-v[d]; + return r; + } + inline vec_t operator+ (vec_t const& x) const + { + vec_t r=*this; + return r+=x; + } + inline vec_t operator- (vec_t const& x) const + { + vec_t r=*this; + return r-=x; + } + inline vec_t operator* (vec_t const& x) const + { + vec_t r=*this; + return r*=x; + } + inline vec_t operator/ (vec_t const& x) const + { + vec_t r=*this; + return r/=x; + } + inline vec_t& operator+= (vec_t const& x) + { + for (size_t d=0; d<D; ++d) v[d]+=x.v[d]; + return *this; + } + inline vec_t& operator-= (vec_t const& x) + { + for (size_t d=0; d<D; ++d) v[d]-=x.v[d]; + return *this; + } + inline vec_t& operator*= (vec_t const& x) + { + for (size_t d=0; d<D; ++d) v[d]*=x.v[d]; + return *this; + } + inline vec_t& operator/= (vec_t const& x) + { + for (size_t d=0; d<D; ++d) v[d]/=x.v[d]; + return *this; + } +}; + +template<typename T> +vec_t<T> exp (vec_t<T> const& x) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=exp(x.v[d]); + return r; +} +template<typename T> +vec_t<T> fabs (vec_t<T> const& x) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=fabs(x.v[d]); + return r; +} +template<typename T> +vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=fmax(x.v[d],y.v[d]); + return r; +} +template<typename T> +vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=fmin(x.v[d],y.v[d]); + return r; +} +template<typename T> +vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y) +{ + return b ? x : y; +} +vec_t<T> log (vec_t<T> const& x) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=log(x.v[d]); + return r; +} +template<typename T> +vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=pow(x.v[d],a); + return r; +} +vec_t<T> sqrt (vec_t<T> const& x) +{ + vec_t r; + for (size_t d=0; d<D; ++d) r.v[d]=sqrt(x.v[d]); + return r; +} diff --git a/src/indirect/vectors.hh b/src/indirect/vectors.hh new file mode 100644 index 0000000..a9f51db --- /dev/null +++ b/src/indirect/vectors.hh @@ -0,0 +1,19 @@ +#ifndef VECTORS_HH +#define VECTORS_HH + +#include <cassert> +#include <cmath> +#include <cstdlib> + + // Default vector implementation, does not vectorise +#include "vectors-default.hh" + +#if 0 + // Intel SSE vector instructions +#include "vectors-intel.hh" + + // Power (Altivec) vector instructions +#include "vectors-power.hh" +#endif + +#endif // #ifndef VECTORS_HH |