using namespace std; // A class template that provides a vectorised type for the underlying // scalar type T. This implementation does "nothing", i.e. just // provides a "vector" class with a vector size of 1, forwarding all // operations to the scalar type. // // This implementation uses small integers in several places, e.g. in // the [] operator. This is efficient only if these integers are // compile time constants, so that the compiler can remove the // corresponding if and switch statements. template struct vec_t { // Names for the underlying scalar type, and for the vector type // used to implement this class. For example, with SSE2, it would be // scalar_t=double, and impl_t=__m128d. typedef T scalar_t; typedef T impl_t; // The payload -- the actual vector content impl_t v; // Vector size (number of elements) static inline size_t size() { return sizeof(impl_t)/sizeof(scalar_t); } // Constructors inline vec_t () { } inline vec_t (scalar_t const& a) : v(a) { } // Convert to the implementation vector type inline operator impl_t () { return v; } // Access individual vector elements inline scalar_t operator[] (size_t const d) const { return v; } // Load vectors from memory. For convenience when using this class, // these accept references to the scalar type instead of pointers to // the vector type. These routines are static members of the class, // so that they can be used as VEC::load(p); if they were // stand-alone functions, one would have to write load(p) // instead. // // Aligned load static inline vec_t load (scalar_t const& p) { return p; } // Unaligned load static inline vec_t loadu (scalar_t const& p) { return p; } // Load a vector from memory that may or may not be aligned, as // decided by the offset and the vector size. These functions are // useful e.g. for loading neightbouring grid points while // evaluating finite differencing stencils. static inline vec_t loadu_maybe (int const off, scalar_t const& p) { return p; } static inline vec_t loadu_maybe3 (int const off0, int const off1, int const off2, scalar_t const& p) { return p; } }; // Store vectors to memory. These routines are stand-alone functions, // so that they can be used as vec_store(p,x); if they were class // members, one would have to write x.store(p) instead, or possibly // VEC::store(p,x). // // Aligned store template inline void vec_store (typename vec_t::scalar_t& p, vec_t const& x) { p=x.v; } // Unaligned store template inline void vec_storeu (typename vec_t::scalar_t& p, vec_t const& x) { p=x.v; } // Non-temporal store, i.e. a store that bypasses the cache template inline void vec_store_nta (typename vec_t::scalar_t& p, vec_t const& x) { p=x.v; } // Store the cnt lower elements of a vector, bypassing the cache if // possible template inline void vec_store_nta_partial_lo (typename vec_t::scalar_t& p, vec_t const& x, size_t const cnt) { assert(0); } // Store the cnt higher elements of a vector, bypassing the cache if // possible. This stores the vector elements into memory locations as // if element 0 were stored at p. template inline void vec_store_nta_partial_hi (typename vec_t::scalar_t& p, vec_t const& x, size_t const cnt) { assert(0); } template inline vec_t operator+ (vec_t const& x) { return +x.v; } template inline vec_t operator- (vec_t const& x) { return -x.v; } template inline vec_t operator+ (vec_t const& x, vec_t const& y) { return x.v + y.v; } template inline vec_t operator- (vec_t const& x, vec_t const& y) { return x.v - y.v; } template inline vec_t operator* (vec_t const& x, vec_t const& y) { return x.v * y.v; } template inline vec_t operator/ (vec_t const& x, vec_t const& y) { return x.v / y.v; } template inline vec_t& operator+= (vec_t& x, vec_t const& y) { x.v += y.v; return x; } template inline vec_t& operator-= (vec_t& x, vec_t const& y) { x.v -= y.v; return x; } template inline vec_t& operator*= (vec_t& x, vec_t const& y) { x.v *= y.v; return x; } template inline vec_t& operator/= (vec_t& x, vec_t const& y) { x.v /= y.v; return x; } template inline vec_t exp (vec_t const& x) { return exp(x.v); } template inline vec_t fabs (vec_t const& x) { return fabs(x.v); } template inline vec_t log (vec_t const& x) { return log(x.v); } template inline vec_t sqrt (vec_t const& x) { return sqrt(x.v); } template inline vec_t fmax (vec_t const& x, vec_t const& y) { return fmax(x.v, y.v); } template inline vec_t fmin (vec_t const& x, vec_t const& y) { return fmin(x.v, y.v); } template inline vec_t pow (vec_t const& x, typename vec_t::scalar_t const& a) { return pow(x.v, a); } template inline vec_t pow (vec_t const& x, int const& i) { return pow(x.v, i); }