From 96b656fc2d33f2d5238db7e8de17b11c5893084b Mon Sep 17 00:00:00 2001 From: eschnett Date: Mon, 29 Nov 2010 16:40:49 +0000 Subject: Add initial implementation git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@3 105869f7-3296-0410-a4ea-f4349344b45a --- doc/documentation.tex | 144 +++++++++++++++ src/indirect/vectors-default.hh | 226 +++++++++++++++++++++++ src/indirect/vectors-intel.hh | 390 ++++++++++++++++++++++++++++++++++++++++ src/indirect/vectors-power.hh | 360 +++++++++++++++++++++++++++++++++++++ src/indirect/vectors-pseudo.hh | 183 +++++++++++++++++++ src/indirect/vectors.hh | 19 ++ 6 files changed, 1322 insertions(+) create mode 100644 doc/documentation.tex create mode 100644 src/indirect/vectors-default.hh create mode 100644 src/indirect/vectors-intel.hh create mode 100644 src/indirect/vectors-power.hh create mode 100644 src/indirect/vectors-pseudo.hh create mode 100644 src/indirect/vectors.hh diff --git a/doc/documentation.tex b/doc/documentation.tex new file mode 100644 index 0000000..55a3762 --- /dev/null +++ b/doc/documentation.tex @@ -0,0 +1,144 @@ +% *======================================================================* +% Cactus Thorn template for ThornGuide documentation +% Author: Ian Kelley +% Date: Sun Jun 02, 2002 +% $Header$ +% +% Thorn documentation in the latex file doc/documentation.tex +% will be included in ThornGuides built with the Cactus make system. +% The scripts employed by the make system automatically include +% pages about variables, parameters and scheduling parsed from the +% relevant thorn CCL files. +% +% This template contains guidelines which help to assure that your +% documentation will be correctly added to ThornGuides. More +% information is available in the Cactus UsersGuide. +% +% Guidelines: +% - Do not change anything before the line +% % START CACTUS THORNGUIDE", +% except for filling in the title, author, date, etc. fields. +% - Each of these fields should only be on ONE line. +% - Author names should be separated with a \\ or a comma. +% - You can define your own macros, but they must appear after +% the START CACTUS THORNGUIDE line, and must not redefine standard +% latex commands. +% - To avoid name clashes with other thorns, 'labels', 'citations', +% 'references', and 'image' names should conform to the following +% convention: +% ARRANGEMENT_THORN_LABEL +% For example, an image wave.eps in the arrangement CactusWave and +% thorn WaveToyC should be renamed to CactusWave_WaveToyC_wave.eps +% - Graphics should only be included using the graphicx package. +% More specifically, with the "\includegraphics" command. Do +% not specify any graphic file extensions in your .tex file. This +% will allow us to create a PDF version of the ThornGuide +% via pdflatex. +% - References should be included with the latex "\bibitem" command. +% - Use \begin{abstract}...\end{abstract} instead of \abstract{...} +% - Do not use \appendix, instead include any appendices you need as +% standard sections. +% - For the benefit of our Perl scripts, and for future extensions, +% please use simple latex. +% +% *======================================================================* +% +% Example of including a graphic image: +% \begin{figure}[ht] +% \begin{center} +% \includegraphics[width=6cm]{MyArrangement_MyThorn_MyFigure} +% \end{center} +% \caption{Illustration of this and that} +% \label{MyArrangement_MyThorn_MyLabel} +% \end{figure} +% +% Example of using a label: +% \label{MyArrangement_MyThorn_MyLabel} +% +% Example of a citation: +% \cite{MyArrangement_MyThorn_Author99} +% +% Example of including a reference +% \bibitem{MyArrangement_MyThorn_Author99} +% {J. Author, {\em The Title of the Book, Journal, or periodical}, 1 (1999), +% 1--16. {\tt http://www.nowhere.com/}} +% +% *======================================================================* + +% If you are using CVS use this line to give version information +% $Header$ + +\documentclass{article} + +% Use the Cactus ThornGuide style file +% (Automatically used from Cactus distribution, if you have a +% thorn without the Cactus Flesh download this from the Cactus +% homepage at www.cactuscode.org) +\usepackage{../../../../doc/latex/cactus} + +\begin{document} + +% The author of the documentation +\author{Erik Schnetter \textless schnetter@cct.lsu.edu\textgreater} + +% The title of the document (not necessarily the name of the Thorn) +\title{Vectors} + +% the date your document was last changed, if your document is in CVS, +% please use: +% \date{$ $Date: 2004-01-07 14:12:39 -0600 (Wed, 07 Jan 2004) $ $} +\date{November 24 2010} + +\maketitle + +% Do not delete next line +% START CACTUS THORNGUIDE + +% Add all definitions used in this documentation here +% \def\mydef etc + +% Add an abstract for this thorn's documentation +\begin{abstract} + +\end{abstract} + +% The following sections are suggestive only. +% Remove them or add your own. + +\section{Introduction} + +\section{Physical System} + +\section{Numerical Implementation} + +\section{Using This Thorn} + +\subsection{Obtaining This Thorn} + +\subsection{Basic Usage} + +\subsection{Special Behaviour} + +\subsection{Interaction With Other Thorns} + +\subsection{Examples} + +\subsection{Support and Feedback} + +\section{History} + +\subsection{Thorn Source Code} + +\subsection{Thorn Documentation} + +\subsection{Acknowledgements} + + +\begin{thebibliography}{9} + +\end{thebibliography} + +% Do not delete next line +% END CACTUS THORNGUIDE + +\end{document} diff --git a/src/indirect/vectors-default.hh b/src/indirect/vectors-default.hh new file mode 100644 index 0000000..9a1e1d3 --- /dev/null +++ b/src/indirect/vectors-default.hh @@ -0,0 +1,226 @@ +using namespace std; + + + +// A class template that provides a vectorised type for the underlying +// scalar type T. This implementation does "nothing", i.e. just +// provides a "vector" class with a vector size of 1, forwarding all +// operations to the scalar type. +// +// This implementation uses small integers in several places, e.g. in +// the [] operator. This is efficient only if these integers are +// compile time constants, so that the compiler can remove the +// corresponding if and switch statements. +template +struct vec_t { + + // Names for the underlying scalar type, and for the vector type + // used to implement this class. For example, with SSE2, it would be + // scalar_t=double, and impl_t=__m128d. + typedef T scalar_t; + typedef T impl_t; + + // The payload -- the actual vector content + impl_t v; + + // Vector size (number of elements) + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + // Constructors + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(a) + { + } + + // Convert to the implementation vector type + inline operator impl_t () + { + return v; + } + + // Access individual vector elements + inline scalar_t operator[] (size_t const d) const + { + return v; + } + + // Load vectors from memory. For convenience when using this class, + // these accept references to the scalar type instead of pointers to + // the vector type. These routines are static members of the class, + // so that they can be used as VEC::load(p); if they were + // stand-alone functions, one would have to write load(p) + // instead. + // + // Aligned load + static inline vec_t load (scalar_t const& p) + { + return p; + } + // Unaligned load + static inline vec_t loadu (scalar_t const& p) + { + return p; + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size. These functions are + // useful e.g. for loading neightbouring grid points while + // evaluating finite differencing stencils. + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + return p; + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + return p; + } +}; + +// Store vectors to memory. These routines are stand-alone functions, +// so that they can be used as vec_store(p,x); if they were class +// members, one would have to write x.store(p) instead, or possibly +// VEC::store(p,x). +// +// Aligned store +template +inline void vec_store (typename vec_t::scalar_t& p, vec_t const& x) +{ + p=x.v; +} +// Unaligned store +template +inline void vec_storeu (typename vec_t::scalar_t& p, vec_t const& x) +{ + p=x.v; +} +// Non-temporal store, i.e. a store that bypasses the cache +template +inline void vec_store_nta (typename vec_t::scalar_t& p, vec_t const& x) +{ + p=x.v; +} +// Store the cnt lower elements of a vector, bypassing the cache if +// possible +template +inline void vec_store_nta_partial_lo (typename vec_t::scalar_t& p, + vec_t const& x, + size_t const cnt) +{ + assert(0); +} +// Store the cnt higher elements of a vector, bypassing the cache if +// possible. This stores the vector elements into memory locations as +// if element 0 were stored at p. +template +inline void vec_store_nta_partial_hi (typename vec_t::scalar_t& p, + vec_t const& x, + size_t const cnt) +{ + assert(0); +} + +template +inline vec_t operator+ (vec_t const& x) +{ + return +x.v; +} +template +inline vec_t operator- (vec_t const& x) +{ + return -x.v; +} + +template +inline vec_t operator+ (vec_t const& x, vec_t const& y) +{ + return x.v + y.v; +} +template +inline vec_t operator- (vec_t const& x, vec_t const& y) +{ + return x.v - y.v; +} +template +inline vec_t operator* (vec_t const& x, vec_t const& y) +{ + return x.v * y.v; +} +template +inline vec_t operator/ (vec_t const& x, vec_t const& y) +{ + return x.v / y.v; +} + +template +inline vec_t& operator+= (vec_t& x, vec_t const& y) +{ + x.v += y.v; + return x; +} +template +inline vec_t& operator-= (vec_t& x, vec_t const& y) +{ + x.v -= y.v; + return x; +} +template +inline vec_t& operator*= (vec_t& x, vec_t const& y) +{ + x.v *= y.v; + return x; +} +template +inline vec_t& operator/= (vec_t& x, vec_t const& y) +{ + x.v /= y.v; + return x; +} + +template +inline vec_t exp (vec_t const& x) +{ + return exp(x.v); +} +template +inline vec_t fabs (vec_t const& x) +{ + return fabs(x.v); +} +template +inline vec_t log (vec_t const& x) +{ + return log(x.v); +} +template +inline vec_t sqrt (vec_t const& x) +{ + return sqrt(x.v); +} + +template +inline vec_t fmax (vec_t const& x, vec_t const& y) +{ + return fmax(x.v, y.v); +} +template +inline vec_t fmin (vec_t const& x, vec_t const& y) +{ + return fmin(x.v, y.v); +} +template +inline vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + return pow(x.v, a); +} +template +inline vec_t pow (vec_t const& x, int const& i) +{ + return pow(x.v, i); +} diff --git a/src/indirect/vectors-intel.hh b/src/indirect/vectors-intel.hh new file mode 100644 index 0000000..29da84f --- /dev/null +++ b/src/indirect/vectors-intel.hh @@ -0,0 +1,390 @@ +#include +#include +using namespace std; + + + +#if defined(__SSE__) // SSE (Intel) + +#include + +template<> +struct vec_T { + typedef float scalar_t; + typedef __m128 impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_ps(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1, + scalar_t const& a2, scalar_t const& a3) + : v(_mm_set_ps(a3,a2,a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f32(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(1,0,3,2))); + case 2: return elt0(_mm_unpackhi_ps(v,v)); + case 3: return elt0(_mm_shuffle_ps(v,v,_MM_SHUFFLE(3,2,1,0))); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_ps(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_ps(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_ps(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_ps(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_ps(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[2]=v[2]; + case 2: (&p)[1]=v[1]; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[1]=v[1]; + case 2: (&p)[2]=v[2]; + case 1: (&p)[3]=v[3]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template +vec_t exp (vec_t const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3])); +} +template +vec_t fabs (vec_t const& x) +{ + return _mm_and_ps(v,_mm_set1_pi32(0x7fffffffU)); +} +template +vec_t fmax (vec_t const& x, vec_t const& y) +{ + return _mm_max_ps(x.v, y.v); +} +template +vec_t fmin (vec_t const& x, vec_t const& y) +{ + return _mm_min_ps(x.v, y.v); +} +template +vec_t ifthen (bool const b, vec_t const& x, vec_t const& y) +{ + return b ? x : y; +} +vec_t log (vec_t const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3])); +} +template +vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a)); +} +vec_t sqrt (vec_t const& x) +{ + return _mm_sqrt_ps(x.v); +} + +#endif + + + +#if defined(__SSE2__) // SSE2 (Intel) + +#include + +template<> +struct vec_T { + typedef double scalar_t; + typedef __m128d impl_t; + impl_t v; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(_mm_set1_pd(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1) + : v(_mm_set_pd(a1,a0)) // reverse order! + { + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + +private: + static inline scalar_t elt0 (impl_t const& v) + { + return _mm_cvtss_f64(v); // this is a no-op + } +public: + inline scalar_t operator[] (size_t const d) + { + switch (d) { + case 0: return elt0(v); + case 1: return elt0(_mm_unpackhi_pd(v,v)); + } + } + + static inline vec_t load (scalar_t const& a) + { + return _mm_load_pd(&a); + } + static inline vec_t loadu (scalar_t const& a) + { + return _mm_loadu_pd(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + _mm_store_pd(&p,v); + } + inline void storeu (scalar_t& p) const + { + _mm_storeu_pd(&p,v); + } + inline void store_nta (scalar_t& p) const + { + _mm_stream_pd(&p,v); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[1]=v[1]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template +vec_t exp (vec_t const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1])); +} +template +vec_t fabs (vec_t const& x) +{ + return _mm_and_pd(v,_mm_set1_epi64(0x7fffffffffffffffULL)); +} +template +vec_t fmax (vec_t const& x, vec_t const& y) +{ + return _mm_max_pd(x.v, y.v); +} +template +vec_t fmin (vec_t const& x, vec_t const& y) +{ + return _mm_min_pd(x.v, y.v); +} +template +vec_t ifthen (bool const b, vec_t const& x, vec_t const& y) +{ + return b ? x : y; +} +vec_t log (vec_t const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1])); +} +template +vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a)); +} +vec_t sqrt (vec_t const& x) +{ + return _mm_sqrt_pd(x.v); +} + +#endif diff --git a/src/indirect/vectors-power.hh b/src/indirect/vectors-power.hh new file mode 100644 index 0000000..37a8786 --- /dev/null +++ b/src/indirect/vectors-power.hh @@ -0,0 +1,360 @@ +#include +#include +using namespace std; + + + +#if defined(__ALTIVEC__) // Altivec (Power) + +#include + +template<> +struct vec_t { + typedef float scalar_t; + typedef vector float impl_t; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(vec_splats(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1, + scalar_t const& a2, scalar_t const& a3) + { + v[0]=a0; v[1]=a1; v[2]=a2; v[3]=a3; + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + + static inline vec_t load (scalar_t const& a) + { + return *(impl_t const*)&a; + } + static inline vec_t loadu (scalar_t const& a) + { + return vec_load(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + *(impl_t*)p = v; + } + inline void storeu (scalar_t& p) const + { + store(p); + } + inline void store_nta (scalar_t& p) const + { + // TODO: Use stvxl instruction? + store(p); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[2]=v[2]; + case 2: (&p)[1]=v[1]; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 4: store_nta(p); break; + case 3: (&p)[1]=v[1]; + case 2: (&p)[2]=v[2]; + case 1: (&p)[3]=v[3]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template +vec_t exp (vec_t const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1]), exp(x.v[2]), exp(x.v[3])); +} +template +vec_t fabs (vec_t const& x) +{ + return vec_abs(x.v); +} +template +vec_t fmax (vec_t const& x, vec_t const& y) +{ + return vec_max(x.v, y.v); +} +template +vec_t fmin (vec_t const& x, vec_t const& y) +{ + return vec_min(x.v, y.v); +} +template +vec_t ifthen (bool const b, vec_t const& x, vec_t const& y) +{ + return b ? x : y; +} +vec_t log (vec_t const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1]), log(x.v[2]), log(x.v[3])); +} +template +vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a), pow(x.v[2],a), pow(x.v[3],a)); +} +vec_t sqrt (vec_t const& x) +{ + return vec_t(sqrt(x.v[0]), sqrt(x.v[1]), sqrt(x.v[2]), sqrt(x.v[3])); +} + +#endif + + + +#if defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec VSX (Power) + +#include + +template<> +struct vec_t { + typedef double scalar_t; + typedef vector double impl_t; + + static inline size_t size() + { + return sizeof(impl_t)/sizeof(scalar_t); + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + : v(vec_splats(a)) + { + } + inline vec_t (scalar_t const& a0, scalar_t const& a1) + { + v[0]=a0; v[1]=a1; + } + + inline vec_t (impl_t const& w) + : v(w) + { + } + inline operator impl_t () + { + return v; + } + + static inline vec_t load (scalar_t const& a) + { + return *(impl_t const*)&a; + } + static inline vec_t loadu (scalar_t const& a) + { + return vec_load(&a); + } + // Load a vector from memory that may or may not be aligned, as + // decided by the offset and the vector size + static inline vec_t loadu_maybe (int const off, scalar_t const& p) + { + if (off % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + static inline vec_t loadu_maybe3 (int const off0, int const off1, + int const off2, + scalar_t const& p) + { + if (off0 % size() == 0 and off1 % size() == 0 and off2 % size() == 0) { + return load(p); + } else { + return loadu(p); + } + } + inline void store (scalar_t& p) const + { + *(impl_t*)p = v; + } + inline void storeu (scalar_t& p) const + { + store(p); + } + inline void store_nta (scalar_t& p) const + { + // TODO: Use stvxl instruction? + store(p); + } + inline void store_nta_partial_lo (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[0]=v[0]; + } + } + inline void store_nta_partial_hi (scalar_t& p, size_t const cnt) const + { + switch (cnt) { + case 2: store_nta(p); break; + case 1: (&p)[1]=v[1]; + } + } + + inline vec_t operator+ () const + { + return +v; + } + inline vec_t operator- () const + { + return -v; + } + inline vec_t operator+ (vec_t const& x) const + { + return v+x.v; + } + inline vec_t operator- (vec_t const& x) const + { + return v-x.v; + } + inline vec_t operator* (vec_t const& x) const + { + return v*x.v; + } + inline vec_t operator/ (vec_t const& x) const + { + return v/x.v; + } + inline vec_t& operator+= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator-= (vec_t const& x) + { + return *this=*this+x; + } + inline vec_t& operator*= (vec_t const& x) + { + return *this=*this-x; + } + inline vec_t& operator/= (vec_t const& x) + { + return *this=/this+x; + } +}; + +template +vec_t exp (vec_t const& x) +{ + return vec_t(exp(x.v[0]), exp(x.v[1])); +} +template +vec_t fabs (vec_t const& x) +{ + return vec_abs(x.v); +} +template +vec_t fmax (vec_t const& x, vec_t const& y) +{ + return vec_max(x.v, y.v); +} +template +vec_t fmin (vec_t const& x, vec_t const& y) +{ + return vec_min(x.v, y.v); +} +template +vec_t ifthen (bool const b, vec_t const& x, vec_t const& y) +{ + return b ? x : y; +} +vec_t log (vec_t const& x) +{ + return vec_t(log(x.v[0]), log(x.v[1])); +} +template +vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + return vec_t(pow(x.v[0],a), pow(x.v[1],a)); +} +vec_t sqrt (vec_t const& x) +{ + return vec_t(sqrt(x.v[0]), sqrt(x.v[1])); +} + +#endif diff --git a/src/indirect/vectors-pseudo.hh b/src/indirect/vectors-pseudo.hh new file mode 100644 index 0000000..a5cca83 --- /dev/null +++ b/src/indirect/vectors-pseudo.hh @@ -0,0 +1,183 @@ +#include +#include +using namespace std; + +template +struct vec_t { + size_t const D = 2; + typedef T scalar_t; + typedef T impl_t; + impl_t v[D]; + + static inline size_t size() + { + return D; + } + + inline vec_t () + { + } + inline vec_t (scalar_t const& a) + { + for (size_t d=0; d +vec_t exp (vec_t const& x) +{ + vec_t r; + for (size_t d=0; d +vec_t fabs (vec_t const& x) +{ + vec_t r; + for (size_t d=0; d +vec_t fmax (vec_t const& x, vec_t const& y) +{ + vec_t r; + for (size_t d=0; d +vec_t fmin (vec_t const& x, vec_t const& y) +{ + vec_t r; + for (size_t d=0; d +vec_t ifthen (bool const b, vec_t const& x, vec_t const& y) +{ + return b ? x : y; +} +vec_t log (vec_t const& x) +{ + vec_t r; + for (size_t d=0; d +vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) +{ + vec_t r; + for (size_t d=0; d sqrt (vec_t const& x) +{ + vec_t r; + for (size_t d=0; d +#include +#include + + // Default vector implementation, does not vectorise +#include "vectors-default.hh" + +#if 0 + // Intel SSE vector instructions +#include "vectors-intel.hh" + + // Power (Altivec) vector instructions +#include "vectors-power.hh" +#endif + +#endif // #ifndef VECTORS_HH -- cgit v1.2.3