aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2010-11-29 16:40:49 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2010-11-29 16:40:49 +0000
commit96b656fc2d33f2d5238db7e8de17b11c5893084b (patch)
tree3857f6f068ec1368adf8bd0d5ca012e7e705f0ef
parent312c4501b28f9d0b43c14451f383b3341b96c759 (diff)
Add initial implementation
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@3 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r--doc/documentation.tex144
-rw-r--r--src/indirect/vectors-default.hh226
-rw-r--r--src/indirect/vectors-intel.hh390
-rw-r--r--src/indirect/vectors-power.hh360
-rw-r--r--src/indirect/vectors-pseudo.hh183
-rw-r--r--src/indirect/vectors.hh19
6 files changed, 1322 insertions, 0 deletions
diff --git a/doc/documentation.tex b/doc/documentation.tex
new file mode 100644
index 0000000..55a3762
--- /dev/null
+++ b/doc/documentation.tex
@@ -0,0 +1,144 @@
+% *======================================================================*
+% Cactus Thorn template for ThornGuide documentation
+% Author: Ian Kelley
+% Date: Sun Jun 02, 2002
+% $Header$
+%
+% Thorn documentation in the latex file doc/documentation.tex
+% will be included in ThornGuides built with the Cactus make system.
+% The scripts employed by the make system automatically include
+% pages about variables, parameters and scheduling parsed from the
+% relevant thorn CCL files.
+%
+% This template contains guidelines which help to assure that your
+% documentation will be correctly added to ThornGuides. More
+% information is available in the Cactus UsersGuide.
+%
+% Guidelines:
+% - Do not change anything before the line
+% % START CACTUS THORNGUIDE",
+% except for filling in the title, author, date, etc. fields.
+% - Each of these fields should only be on ONE line.
+% - Author names should be separated with a \\ or a comma.
+% - You can define your own macros, but they must appear after
+% the START CACTUS THORNGUIDE line, and must not redefine standard
+% latex commands.
+% - To avoid name clashes with other thorns, 'labels', 'citations',
+% 'references', and 'image' names should conform to the following
+% convention:
+% ARRANGEMENT_THORN_LABEL
+% For example, an image wave.eps in the arrangement CactusWave and
+% thorn WaveToyC should be renamed to CactusWave_WaveToyC_wave.eps
+% - Graphics should only be included using the graphicx package.
+% More specifically, with the "\includegraphics" command. Do
+% not specify any graphic file extensions in your .tex file. This
+% will allow us to create a PDF version of the ThornGuide
+% via pdflatex.
+% - References should be included with the latex "\bibitem" command.
+% - Use \begin{abstract}...\end{abstract} instead of \abstract{...}
+% - Do not use \appendix, instead include any appendices you need as
+% standard sections.
+% - For the benefit of our Perl scripts, and for future extensions,
+% please use simple latex.
+%
+% *======================================================================*
+%
+% Example of including a graphic image:
+% \begin{figure}[ht]
+% \begin{center}
+% \includegraphics[width=6cm]{MyArrangement_MyThorn_MyFigure}
+% \end{center}
+% \caption{Illustration of this and that}
+% \label{MyArrangement_MyThorn_MyLabel}
+% \end{figure}
+%
+% Example of using a label:
+% \label{MyArrangement_MyThorn_MyLabel}
+%
+% Example of a citation:
+% \cite{MyArrangement_MyThorn_Author99}
+%
+% Example of including a reference
+% \bibitem{MyArrangement_MyThorn_Author99}
+% {J. Author, {\em The Title of the Book, Journal, or periodical}, 1 (1999),
+% 1--16. {\tt http://www.nowhere.com/}}
+%
+% *======================================================================*
+
+% If you are using CVS use this line to give version information
+% $Header$
+
+\documentclass{article}
+
+% Use the Cactus ThornGuide style file
+% (Automatically used from Cactus distribution, if you have a
+% thorn without the Cactus Flesh download this from the Cactus
+% homepage at www.cactuscode.org)
+\usepackage{../../../../doc/latex/cactus}
+
+\begin{document}
+
+% The author of the documentation
+\author{Erik Schnetter \textless schnetter@cct.lsu.edu\textgreater}
+
+% The title of the document (not necessarily the name of the Thorn)
+\title{Vectors}
+
+% the date your document was last changed, if your document is in CVS,
+% please use:
+% \date{$ $Date: 2004-01-07 14:12:39 -0600 (Wed, 07 Jan 2004) $ $}
+\date{November 24 2010}
+
+\maketitle
+
+% Do not delete next line
+% START CACTUS THORNGUIDE
+
+% Add all definitions used in this documentation here
+% \def\mydef etc
+
+% Add an abstract for this thorn's documentation
+\begin{abstract}
+
+\end{abstract}
+
+% The following sections are suggestive only.
+% Remove them or add your own.
+
+\section{Introduction}
+
+\section{Physical System}
+
+\section{Numerical Implementation}
+
+\section{Using This Thorn}
+
+\subsection{Obtaining This Thorn}
+
+\subsection{Basic Usage}
+
+\subsection{Special Behaviour}
+
+\subsection{Interaction With Other Thorns}
+
+\subsection{Examples}
+
+\subsection{Support and Feedback}
+
+\section{History}
+
+\subsection{Thorn Source Code}
+
+\subsection{Thorn Documentation}
+
+\subsection{Acknowledgements}
+
+
+\begin{thebibliography}{9}
+
+\end{thebibliography}
+
+% Do not delete next line
+% END CACTUS THORNGUIDE
+
+\end{document}
diff --git a/src/indirect/vectors-default.hh b/src/indirect/vectors-default.hh
new file mode 100644
index 0000000..9a1e1d3
--- /dev/null
+++ b/src/indirect/vectors-default.hh
@@ -0,0 +1,226 @@
+using namespace std;
+
+
+
+// A class template that provides a vectorised type for the underlying
+// scalar type T. This implementation does "nothing", i.e. just
+// provides a "vector" class with a vector size of 1, forwarding all
+// operations to the scalar type.
+//
+// This implementation uses small integers in several places, e.g. in
+// the [] operator. This is efficient only if these integers are
+// compile time constants, so that the compiler can remove the
+// corresponding if and switch statements.
+template<typename T>
+struct vec_t {
+
+ // Names for the underlying scalar type, and for the vector type
+ // used to implement this class. For example, with SSE2, it would be
+ // scalar_t=double, and impl_t=__m128d.
+ typedef T scalar_t;
+ typedef T impl_t;
+
+ // The payload -- the actual vector content
+ impl_t v;
+
+ // Vector size (number of elements)
+ static inline size_t size()
+ {
+ return sizeof(impl_t)/sizeof(scalar_t);
+ }
+
+ // Constructors
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ : v(a)
+ {
+ }
+
+ // Convert to the implementation vector type
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+ // Access individual vector elements
+ inline scalar_t operator[] (size_t const d) const
+ {
+ return v;
+ }
+
+ // Load vectors from memory. For convenience when using this class,
+ // these accept references to the scalar type instead of pointers to
+ // the vector type. These routines are static members of the class,
+ // so that they can be used as VEC::load(p); if they were
+ // stand-alone functions, one would have to write load<SCALAR>(p)
+ // instead.
+ //
+ // Aligned load
+ static inline vec_t load (scalar_t const& p)
+ {
+ return p;
+ }
+ // Unaligned load
+ static inline vec_t loadu (scalar_t const& p)
+ {
+ return p;
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size. These functions are
+ // useful e.g. for loading neightbouring grid points while
+ // evaluating finite differencing stencils.
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ return p;
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ return p;
+ }
+};
+
+// Store vectors to memory. These routines are stand-alone functions,
+// so that they can be used as vec_store(p,x); if they were class
+// members, one would have to write x.store(p) instead, or possibly
+// VEC::store(p,x).
+//
+// Aligned store
+template<typename T>
+inline void vec_store (typename vec_t<T>::scalar_t& p, vec_t<T> const& x)
+{
+ p=x.v;
+}
+// Unaligned store
+template<typename T>
+inline void vec_storeu (typename vec_t<T>::scalar_t& p, vec_t<T> const& x)
+{
+ p=x.v;
+}
+// Non-temporal store, i.e. a store that bypasses the cache
+template<typename T>
+inline void vec_store_nta (typename vec_t<T>::scalar_t& p, vec_t<T> const& x)
+{
+ p=x.v;
+}
+// Store the cnt lower elements of a vector, bypassing the cache if
+// possible
+template<typename T>
+inline void vec_store_nta_partial_lo (typename vec_t<T>::scalar_t& p,
+ vec_t<T> const& x,
+ size_t const cnt)
+{
+ assert(0);
+}
+// Store the cnt higher elements of a vector, bypassing the cache if
+// possible. This stores the vector elements into memory locations as
+// if element 0 were stored at p.
+template<typename T>
+inline void vec_store_nta_partial_hi (typename vec_t<T>::scalar_t& p,
+ vec_t<T> const& x,
+ size_t const cnt)
+{
+ assert(0);
+}
+
+template<typename T>
+inline vec_t<T> operator+ (vec_t<T> const& x)
+{
+ return +x.v;
+}
+template<typename T>
+inline vec_t<T> operator- (vec_t<T> const& x)
+{
+ return -x.v;
+}
+
+template<typename T>
+inline vec_t<T> operator+ (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return x.v + y.v;
+}
+template<typename T>
+inline vec_t<T> operator- (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return x.v - y.v;
+}
+template<typename T>
+inline vec_t<T> operator* (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return x.v * y.v;
+}
+template<typename T>
+inline vec_t<T> operator/ (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return x.v / y.v;
+}
+
+template<typename T>
+inline vec_t<T>& operator+= (vec_t<T>& x, vec_t<T> const& y)
+{
+ x.v += y.v;
+ return x;
+}
+template<typename T>
+inline vec_t<T>& operator-= (vec_t<T>& x, vec_t<T> const& y)
+{
+ x.v -= y.v;
+ return x;
+}
+template<typename T>
+inline vec_t<T>& operator*= (vec_t<T>& x, vec_t<T> const& y)
+{
+ x.v *= y.v;
+ return x;
+}
+template<typename T>
+inline vec_t<T>& operator/= (vec_t<T>& x, vec_t<T> const& y)
+{
+ x.v /= y.v;
+ return x;
+}
+
+template<typename T>
+inline vec_t<T> exp (vec_t<T> const& x)
+{
+ return exp(x.v);
+}
+template<typename T>
+inline vec_t<T> fabs (vec_t<T> const& x)
+{
+ return fabs(x.v);
+}
+template<typename T>
+inline vec_t<T> log (vec_t<T> const& x)
+{
+ return log(x.v);
+}
+template<typename T>
+inline vec_t<T> sqrt (vec_t<T> const& x)
+{
+ return sqrt(x.v);
+}
+
+template<typename T>
+inline vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return fmax(x.v, y.v);
+}
+template<typename T>
+inline vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return fmin(x.v, y.v);
+}
+template<typename T>
+inline vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ return pow(x.v, a);
+}
+template<typename T>
+inline vec_t<T> pow (vec_t<T> const& x, int const& i)
+{
+ return pow(x.v, i);
+}
diff --git a/src/indirect/vectors-intel.hh b/src/indirect/vectors-intel.hh
new file mode 100644
index 0000000..29da84f
--- /dev/null
+++ b/src/indirect/vectors-intel.hh
@@ -0,0 +1,390 @@
+#include <cmath>
+#include <cstdlib>
+using namespace std;
+
+
+
+#if defined(__SSE__) // SSE (Intel)
+
+#include <xmmintrin.h>
+
+template<>
+struct vec_T<float> {
+ typedef float scalar_t;
+ typedef __m128 impl_t;
+ impl_t v;
+
+ static inline size_t size()
+ {
+ return sizeof(impl_t)/sizeof(scalar_t);
+ }
+
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ : v(_mm_set1_ps(a))
+ {
+ }
+ inline vec_t (scalar_t const& a0, scalar_t const& a1,
+ scalar_t const& a2, scalar_t const& a3)
+ : v(_mm_set_ps(a3,a2,a1,a0)) // reverse order!
+ {
+ }
+
+ inline vec_t (impl_t const& w)
+ : v(w)
+ {
+ }
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+private:
+ static inline scalar_t elt0 (impl_t const& v)
+ {
+ return _mm_cvtss_f32(v); // this is a no-op
+ }
+public:
+ inline scalar_t operator[] (size_t const d)
+ {
+ switch (d) {
+ case 0: return elt0(v);
+ case 1: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(1,0,3,2)));
+ case 2: return elt0(_mm_unpackhi_ps(v,v));
+ case 3: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(3,2,1,0)));
+ }
+ }
+
+ static inline vec_t load (scalar_t const& a)
+ {
+ return _mm_load_ps(&a);
+ }
+ static inline vec_t loadu (scalar_t const& a)
+ {
+ return _mm_loadu_ps(&a);
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ if (off % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ inline void store (scalar_t& p) const
+ {
+ _mm_store_ps(&p,v);
+ }
+ inline void storeu (scalar_t& p) const
+ {
+ _mm_storeu_ps(&p,v);
+ }
+ inline void store_nta (scalar_t& p) const
+ {
+ _mm_stream_ps(&p,v);
+ }
+ inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 4: store_nta(p); break;
+ case 3: (&p)[2]=v[2];
+ case 2: (&p)[1]=v[1];
+ case 1: (&p)[0]=v[0];
+ }
+ }
+ inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 4: store_nta(p); break;
+ case 3: (&p)[1]=v[1];
+ case 2: (&p)[2]=v[2];
+ case 1: (&p)[3]=v[3];
+ }
+ }
+
+ inline vec_t operator+ () const
+ {
+ return +v;
+ }
+ inline vec_t operator- () const
+ {
+ return -v;
+ }
+ inline vec_t operator+ (vec_t const& x) const
+ {
+ return v+x.v;
+ }
+ inline vec_t operator- (vec_t const& x) const
+ {
+ return v-x.v;
+ }
+ inline vec_t operator* (vec_t const& x) const
+ {
+ return v*x.v;
+ }
+ inline vec_t operator/ (vec_t const& x) const
+ {
+ return v/x.v;
+ }
+ inline vec_t& operator+= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator-= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator*= (vec_t const& x)
+ {
+ return *this=*this-x;
+ }
+ inline vec_t& operator/= (vec_t const& x)
+ {
+ return *this=/this+x;
+ }
+};
+
+template<typename T>
+vec_t<T> exp (vec_t<T> const& x)
+{
+ return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3]));
+}
+template<typename T>
+vec_t<T> fabs (vec_t<T> const& x)
+{
+ return _mm_and_ps(v,_mm_set1_pi32(0x7fffffffU));
+}
+template<typename T>
+vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return _mm_max_ps(x.v, y.v);
+}
+template<typename T>
+vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return _mm_min_ps(x.v, y.v);
+}
+template<typename T>
+vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y)
+{
+ return b ? x : y;
+}
+vec_t<T> log (vec_t<T> const& x)
+{
+ return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3]));
+}
+template<typename T>
+vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a));
+}
+vec_t<T> sqrt (vec_t<T> const& x)
+{
+ return _mm_sqrt_ps(x.v);
+}
+
+#endif
+
+
+
+#if defined(__SSE2__) // SSE2 (Intel)
+
+#include <emmintrin.h>
+
+template<>
+struct vec_T<double> {
+ typedef double scalar_t;
+ typedef __m128d impl_t;
+ impl_t v;
+
+ static inline size_t size()
+ {
+ return sizeof(impl_t)/sizeof(scalar_t);
+ }
+
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ : v(_mm_set1_pd(a))
+ {
+ }
+ inline vec_t (scalar_t const& a0, scalar_t const& a1)
+ : v(_mm_set_pd(a1,a0)) // reverse order!
+ {
+ }
+
+ inline vec_t (impl_t const& w)
+ : v(w)
+ {
+ }
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+private:
+ static inline scalar_t elt0 (impl_t const& v)
+ {
+ return _mm_cvtss_f64(v); // this is a no-op
+ }
+public:
+ inline scalar_t operator[] (size_t const d)
+ {
+ switch (d) {
+ case 0: return elt0(v);
+ case 1: return elt0(_mm_unpackhi_pd(v,v));
+ }
+ }
+
+ static inline vec_t load (scalar_t const& a)
+ {
+ return _mm_load_pd(&a);
+ }
+ static inline vec_t loadu (scalar_t const& a)
+ {
+ return _mm_loadu_pd(&a);
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ if (off % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ inline void store (scalar_t& p) const
+ {
+ _mm_store_pd(&p,v);
+ }
+ inline void storeu (scalar_t& p) const
+ {
+ _mm_storeu_pd(&p,v);
+ }
+ inline void store_nta (scalar_t& p) const
+ {
+ _mm_stream_pd(&p,v);
+ }
+ inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 2: store_nta(p); break;
+ case 1: (&p)[0]=v[0];
+ }
+ }
+ inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 2: store_nta(p); break;
+ case 1: (&p)[1]=v[1];
+ }
+ }
+
+ inline vec_t operator+ () const
+ {
+ return +v;
+ }
+ inline vec_t operator- () const
+ {
+ return -v;
+ }
+ inline vec_t operator+ (vec_t const& x) const
+ {
+ return v+x.v;
+ }
+ inline vec_t operator- (vec_t const& x) const
+ {
+ return v-x.v;
+ }
+ inline vec_t operator* (vec_t const& x) const
+ {
+ return v*x.v;
+ }
+ inline vec_t operator/ (vec_t const& x) const
+ {
+ return v/x.v;
+ }
+ inline vec_t& operator+= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator-= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator*= (vec_t const& x)
+ {
+ return *this=*this-x;
+ }
+ inline vec_t& operator/= (vec_t const& x)
+ {
+ return *this=/this+x;
+ }
+};
+
+template<typename T>
+vec_t<T> exp (vec_t<T> const& x)
+{
+ return vec_t(exp(x.v[0]), exp(x.v[1]));
+}
+template<typename T>
+vec_t<T> fabs (vec_t<T> const& x)
+{
+ return _mm_and_pd(v,_mm_set1_epi64(0x7fffffffffffffffULL));
+}
+template<typename T>
+vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return _mm_max_pd(x.v, y.v);
+}
+template<typename T>
+vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return _mm_min_pd(x.v, y.v);
+}
+template<typename T>
+vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y)
+{
+ return b ? x : y;
+}
+vec_t<T> log (vec_t<T> const& x)
+{
+ return vec_t(log(x.v[0]), log(x.v[1]));
+}
+template<typename T>
+vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ return vec_t(pow(x.v[0],a), pow(x.v[1],a));
+}
+vec_t<T> sqrt (vec_t<T> const& x)
+{
+ return _mm_sqrt_pd(x.v);
+}
+
+#endif
diff --git a/src/indirect/vectors-power.hh b/src/indirect/vectors-power.hh
new file mode 100644
index 0000000..37a8786
--- /dev/null
+++ b/src/indirect/vectors-power.hh
@@ -0,0 +1,360 @@
+#include <cmath>
+#include <cstdlib>
+using namespace std;
+
+
+
+#if defined(__ALTIVEC__) // Altivec (Power)
+
+#include <altivec.h>
+
+template<>
+struct vec_t<float> {
+ typedef float scalar_t;
+ typedef vector float impl_t;
+
+ static inline size_t size()
+ {
+ return sizeof(impl_t)/sizeof(scalar_t);
+ }
+
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ : v(vec_splats(a))
+ {
+ }
+ inline vec_t (scalar_t const& a0, scalar_t const& a1,
+ scalar_t const& a2, scalar_t const& a3)
+ {
+ v[0]=a0; v[1]=a1; v[2]=a2; v[3]=a3;
+ }
+
+ inline vec_t (impl_t const& w)
+ : v(w)
+ {
+ }
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+ static inline vec_t load (scalar_t const& a)
+ {
+ return *(impl_t const*)&a;
+ }
+ static inline vec_t loadu (scalar_t const& a)
+ {
+ return vec_load(&a);
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ if (off % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ inline void store (scalar_t& p) const
+ {
+ *(impl_t*)p = v;
+ }
+ inline void storeu (scalar_t& p) const
+ {
+ store(p);
+ }
+ inline void store_nta (scalar_t& p) const
+ {
+ // TODO: Use stvxl instruction?
+ store(p);
+ }
+ inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 4: store_nta(p); break;
+ case 3: (&p)[2]=v[2];
+ case 2: (&p)[1]=v[1];
+ case 1: (&p)[0]=v[0];
+ }
+ }
+ inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 4: store_nta(p); break;
+ case 3: (&p)[1]=v[1];
+ case 2: (&p)[2]=v[2];
+ case 1: (&p)[3]=v[3];
+ }
+ }
+
+ inline vec_t operator+ () const
+ {
+ return +v;
+ }
+ inline vec_t operator- () const
+ {
+ return -v;
+ }
+ inline vec_t operator+ (vec_t const& x) const
+ {
+ return v+x.v;
+ }
+ inline vec_t operator- (vec_t const& x) const
+ {
+ return v-x.v;
+ }
+ inline vec_t operator* (vec_t const& x) const
+ {
+ return v*x.v;
+ }
+ inline vec_t operator/ (vec_t const& x) const
+ {
+ return v/x.v;
+ }
+ inline vec_t& operator+= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator-= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator*= (vec_t const& x)
+ {
+ return *this=*this-x;
+ }
+ inline vec_t& operator/= (vec_t const& x)
+ {
+ return *this=/this+x;
+ }
+};
+
+template<typename T>
+vec_t<T> exp (vec_t<T> const& x)
+{
+ return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3]));
+}
+template<typename T>
+vec_t<T> fabs (vec_t<T> const& x)
+{
+ return vec_abs(x.v);
+}
+template<typename T>
+vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return vec_max(x.v, y.v);
+}
+template<typename T>
+vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return vec_min(x.v, y.v);
+}
+template<typename T>
+vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y)
+{
+ return b ? x : y;
+}
+vec_t<T> log (vec_t<T> const& x)
+{
+ return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3]));
+}
+template<typename T>
+vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a));
+}
+vec_t<T> sqrt (vec_t<T> const& x)
+{
+ return vec_t(sqrt(x.v[0]), sqrt(x.v[1]), sqrt(x.v[2]), sqrt(x.v[3]));
+}
+
+#endif
+
+
+
+#if defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec VSX (Power)
+
+#include <altivec.h>
+
+template<>
+struct vec_t<double> {
+ typedef double scalar_t;
+ typedef vector double impl_t;
+
+ static inline size_t size()
+ {
+ return sizeof(impl_t)/sizeof(scalar_t);
+ }
+
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ : v(vec_splats(a))
+ {
+ }
+ inline vec_t (scalar_t const& a0, scalar_t const& a1)
+ {
+ v[0]=a0; v[1]=a1;
+ }
+
+ inline vec_t (impl_t const& w)
+ : v(w)
+ {
+ }
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+ static inline vec_t load (scalar_t const& a)
+ {
+ return *(impl_t const*)&a;
+ }
+ static inline vec_t loadu (scalar_t const& a)
+ {
+ return vec_load(&a);
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ if (off % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) {
+ return load(p);
+ } else {
+ return loadu(p);
+ }
+ }
+ inline void store (scalar_t& p) const
+ {
+ *(impl_t*)p = v;
+ }
+ inline void storeu (scalar_t& p) const
+ {
+ store(p);
+ }
+ inline void store_nta (scalar_t& p) const
+ {
+ // TODO: Use stvxl instruction?
+ store(p);
+ }
+ inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 2: store_nta(p); break;
+ case 1: (&p)[0]=v[0];
+ }
+ }
+ inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const
+ {
+ switch (cnt) {
+ case 2: store_nta(p); break;
+ case 1: (&p)[1]=v[1];
+ }
+ }
+
+ inline vec_t operator+ () const
+ {
+ return +v;
+ }
+ inline vec_t operator- () const
+ {
+ return -v;
+ }
+ inline vec_t operator+ (vec_t const& x) const
+ {
+ return v+x.v;
+ }
+ inline vec_t operator- (vec_t const& x) const
+ {
+ return v-x.v;
+ }
+ inline vec_t operator* (vec_t const& x) const
+ {
+ return v*x.v;
+ }
+ inline vec_t operator/ (vec_t const& x) const
+ {
+ return v/x.v;
+ }
+ inline vec_t& operator+= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator-= (vec_t const& x)
+ {
+ return *this=*this+x;
+ }
+ inline vec_t& operator*= (vec_t const& x)
+ {
+ return *this=*this-x;
+ }
+ inline vec_t& operator/= (vec_t const& x)
+ {
+ return *this=/this+x;
+ }
+};
+
+template<typename T>
+vec_t<T> exp (vec_t<T> const& x)
+{
+ return vec_t(exp(x.v[0]), exp(x.v[1]));
+}
+template<typename T>
+vec_t<T> fabs (vec_t<T> const& x)
+{
+ return vec_abs(x.v);
+}
+template<typename T>
+vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return vec_max(x.v, y.v);
+}
+template<typename T>
+vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ return vec_min(x.v, y.v);
+}
+template<typename T>
+vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y)
+{
+ return b ? x : y;
+}
+vec_t<T> log (vec_t<T> const& x)
+{
+ return vec_t(log(x.v[0]), log(x.v[1]));
+}
+template<typename T>
+vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ return vec_t(pow(x.v[0],a), pow(x.v[1],a));
+}
+vec_t<T> sqrt (vec_t<T> const& x)
+{
+ return vec_t(sqrt(x.v[0]), sqrt(x.v[1]));
+}
+
+#endif
diff --git a/src/indirect/vectors-pseudo.hh b/src/indirect/vectors-pseudo.hh
new file mode 100644
index 0000000..a5cca83
--- /dev/null
+++ b/src/indirect/vectors-pseudo.hh
@@ -0,0 +1,183 @@
+#include <cmath>
+#include <cstdlib>
+using namespace std;
+
+template<typename T>
+struct vec_t {
+ size_t const D = 2;
+ typedef T scalar_t;
+ typedef T impl_t;
+ impl_t v[D];
+
+ static inline size_t size()
+ {
+ return D;
+ }
+
+ inline vec_t ()
+ {
+ }
+ inline vec_t (scalar_t const& a)
+ {
+ for (size_t d=0; d<D; ++d) v[d]=a;
+ }
+
+ inline operator impl_t ()
+ {
+ return v;
+ }
+
+ inline scalar_t operator[] (size_t const d) const
+ {
+ return v[d];
+ }
+
+ static inline vec_t load (scalar_t const& a)
+ {
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=(&a)[d];
+ return r;
+ }
+ static inline vec_t loadu (scalar_t const& a)
+ {
+ return load(a);
+ }
+ // Load a vector from memory that may or may not be aligned, as
+ // decided by the offset and the vector size
+ static inline vec_t loadu_maybe (int const off, scalar_t const& p)
+ {
+ return load(a);
+ }
+ static inline vec_t loadu_maybe3 (int const off0, int const off1,
+ int const off2,
+ scalar_t const& p)
+ {
+ return load(a);
+ }
+ inline void store (scalar_t& p) const
+ {
+ for (size_t d=0; d<D; ++d) (&p)[d]=v[d];
+ }
+ inline void storeu (scalar_t& p) const
+ {
+ store(p);
+ }
+ inline void store_nta (scalar_t& p) const
+ {
+ store(p);
+ }
+ inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const
+ {
+ for (size_t d=0; d<cnt; ++d) (&p)[d]=v[d];
+ }
+ inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const
+ {
+ for (size_t d=D-cnt; d<D; ++d) (&p)[d]=v[d];
+ }
+
+ inline vec_t operator+ () const
+ {
+ vec_t r=*this;
+ for (size_t d=0; d<D; ++d) r.v[d]=+v[d];
+ return r;
+ }
+ inline vec_t operator- () const
+ {
+ vec_t r=*this;
+ for (size_t d=0; d<D; ++d) r.v[d]=-v[d];
+ return r;
+ }
+ inline vec_t operator+ (vec_t const& x) const
+ {
+ vec_t r=*this;
+ return r+=x;
+ }
+ inline vec_t operator- (vec_t const& x) const
+ {
+ vec_t r=*this;
+ return r-=x;
+ }
+ inline vec_t operator* (vec_t const& x) const
+ {
+ vec_t r=*this;
+ return r*=x;
+ }
+ inline vec_t operator/ (vec_t const& x) const
+ {
+ vec_t r=*this;
+ return r/=x;
+ }
+ inline vec_t& operator+= (vec_t const& x)
+ {
+ for (size_t d=0; d<D; ++d) v[d]+=x.v[d];
+ return *this;
+ }
+ inline vec_t& operator-= (vec_t const& x)
+ {
+ for (size_t d=0; d<D; ++d) v[d]-=x.v[d];
+ return *this;
+ }
+ inline vec_t& operator*= (vec_t const& x)
+ {
+ for (size_t d=0; d<D; ++d) v[d]*=x.v[d];
+ return *this;
+ }
+ inline vec_t& operator/= (vec_t const& x)
+ {
+ for (size_t d=0; d<D; ++d) v[d]/=x.v[d];
+ return *this;
+ }
+};
+
+template<typename T>
+vec_t<T> exp (vec_t<T> const& x)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=exp(x.v[d]);
+ return r;
+}
+template<typename T>
+vec_t<T> fabs (vec_t<T> const& x)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=fabs(x.v[d]);
+ return r;
+}
+template<typename T>
+vec_t<T> fmax (vec_t<T> const& x, vec_t<T> const& y)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=fmax(x.v[d],y.v[d]);
+ return r;
+}
+template<typename T>
+vec_t<T> fmin (vec_t<T> const& x, vec_t<T> const& y)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=fmin(x.v[d],y.v[d]);
+ return r;
+}
+template<typename T>
+vec_t<T> ifthen (bool const b, vec_t<T> const& x, vec_t<T> const& y)
+{
+ return b ? x : y;
+}
+vec_t<T> log (vec_t<T> const& x)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=log(x.v[d]);
+ return r;
+}
+template<typename T>
+vec_t<T> pow (vec_t<T> const& x, typename vec_t<T>::scalar_t const& a)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=pow(x.v[d],a);
+ return r;
+}
+vec_t<T> sqrt (vec_t<T> const& x)
+{
+ vec_t r;
+ for (size_t d=0; d<D; ++d) r.v[d]=sqrt(x.v[d]);
+ return r;
+}
diff --git a/src/indirect/vectors.hh b/src/indirect/vectors.hh
new file mode 100644
index 0000000..a9f51db
--- /dev/null
+++ b/src/indirect/vectors.hh
@@ -0,0 +1,19 @@
+#ifndef VECTORS_HH
+#define VECTORS_HH
+
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+ // Default vector implementation, does not vectorise
+#include "vectors-default.hh"
+
+#if 0
+ // Intel SSE vector instructions
+#include "vectors-intel.hh"
+
+ // Power (Altivec) vector instructions
+#include "vectors-power.hh"
+#endif
+
+#endif // #ifndef VECTORS_HH