aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Hinder <ian.hinder@aei.mpg.de>2010-11-22 11:20:48 +0100
committerIan Hinder <ian.hinder@aei.mpg.de>2010-11-23 00:19:37 +0100
commitd6c4d4c2131107ef3a4004692823e2041394acd6 (patch)
treebbdd18d5c45e9d4b4f722c8792c4c7d1f02ced19
parentc9431558d69ee1b8769c918044a45bdd50520b46 (diff)
Implement vectorisation
This is Erik's vectorisation working tree from 13-Oct-2010
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h59
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh135
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh194
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh111
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh212
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh31
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh104
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh591
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh72
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh14
-rw-r--r--Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh238
-rw-r--r--Tools/CodeGen/CalculationFunction.m28
-rw-r--r--Tools/CodeGen/CodeGen.m229
-rw-r--r--Tools/CodeGen/Differencing.m120
-rw-r--r--Tools/CodeGen/Kranc.m6
-rw-r--r--Tools/CodeGen/Thorn.m29
16 files changed, 1767 insertions, 406 deletions
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h
index 29c89d1..ee6a3b7 100644
--- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h
@@ -1,17 +1,17 @@
-#define Power(x, y) (pow((x), (y)))
+#define Power(x, y) (pow(x,y))
#define Sqrt(x) (sqrt(x))
#ifdef KRANC_C
-#define Abs(x) (fabs(x))
-#define Min(x, y) (fmin((x), (y)))
-#define Max(x, y) (fmax((x), (y)))
-#define IfThen(x,y,z) ((x) ? (y) : (z))
+# define Abs(x) (fabs(x))
+# define Min(x, y) (fmin(x,y))
+# define Max(x, y) (fmax(x,y))
+# define IfThen(x,y,z) ((x) ? (y) : (z))
#else
-#define Abs(x) (abs(x))
-#define Min(x, y) (min((x), (y)))
-#define Max(x, y) (max((x), (y)))
+# define Abs(x) (abs(x))
+# define Min(x, y) (min(x,y))
+# define Max(x, y) (max(x,y))
/* IfThen cannot be expressed in Fortran */
#endif
@@ -31,17 +31,46 @@
#define Tanh(x) (tanh(x))
#ifdef KRANC_C
-#define Sign(x) (signbit(x)?-1:+1)
+# define Sign(x) ((x)<0?-1:+1)
+# define ToReal(x) ((CCTK_REAL)(x))
#else
-#define Sign(x) (sgn(x))
+# define Sign(x) (sgn(x))
+# define ToReal(x) (real((x),kind(khalf)))
#endif
+/* TODO: use fma(x,y,z) to implement fmadd and friends? Note that fma
+ may be unsupported, or may be slow. */
+
+/* #define fmadd(x,y,z) ((x)*(y)+(z)) */
+/* #define fmsub(x,y,z) ((x)*(y)-(z)) */
+/* #define fnmadd(x,y,z) (-(z)-(x)*(y)) */
+/* #define fnmsub(x,y,z) (+(z)-(x)*(y)) */
+
+#define fneg(x) (-(x))
+#define fmul(x,y) ((x)*(y))
+#define fdiv(x,y) ((x)/(y))
+#define fadd(x,y) ((x)+(y))
+#define fsub(x,y) ((x)-(y))
+
+#define fmadd(x,y,z) (fadd(fmul(x,y),z))
+#define fmsub(x,y,z) (fsub(fmul(x,y),z))
+#define fnmadd(x,y,z) (fsub(fneg(z),fmul(x,y)))
+#define fnmsub(x,y,z) (fsub(z,fmul(x,y)))
+
+#define kexp(x) (exp(x))
+#define kfabs(x) (fabs(x))
+#define kfmax(x,y) (fmax(x,y))
+#define kfmin(x,y) (fmin(x,y))
+#define klog(x) (log(x))
+#define kpow(x,y) (pow(x,y))
+#define ksqrt(x) (sqrt(x))
+
#ifdef KRANC_C
-#define E M_E
-#define Pi M_PI
+# define E M_E
+# define Pi M_PI
#else
-#define E 2.71828182845904523536029d0
-#define Pi 3.14159265358979323846264d0
+# define E 2.71828182845904523536029d0
+# define Pi 3.14159265358979323846264d0
#endif
-#define UnitStep(x) ( (x) > 0 ? 1 : 0 )
+#define UnitStep(x) ((x)>0)
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh
new file mode 100644
index 0000000..12cd6e8
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh
@@ -0,0 +1,135 @@
+// Vectorise using Intel's or AMD's SSE2
+
+// Use the type __m128d directly, without introducing a wrapper class
+// Use macros instead of inline functions
+
+
+
+#include <emmintrin.h>
+
+// Vector type corresponding to CCTK_REAL
+typedef __m128d CCTK_REAL_VEC;
+
+// Number of vector elements in a CCTK_REAL_VEC
+static
+int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+
+
+
+// Create vectors, extract vector elements
+
+#define vec_set1(a) (_mm_set1_pd(a))
+#define vec_set(a,b) (_mm_set_pd(b,a))
+
+// Get a scalar from the vector
+#if defined(__PGI) && defined (__amd64__)
+// _mm_cvtsd_f64 does not exist on PGI compilers
+// # define vec_elt0(x) (*(CCTK_REAL const*)&(x))
+# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; })
+#else
+// this is a no-op
+# define vec_elt0(x) (_mm_cvtsd_f64(x))
+#endif
+#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); })
+
+
+
+// Load and store vectors
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+#define vec_load(p) (_mm_load_pd(&(p)))
+#define vec_loadu(p) (_mm_loadu_pd(&(p)))
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: Always use unaligned load
+#define vec_loadu_maybe(off,p) (vec_loadu(p))
+#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p))
+#if 0
+#define vec_loadu_maybe(off,p) \
+ (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \
+ vec_load(p) : vec_loadu(p))
+#define vec_loadu_maybe3(off1,off2,off3,p) \
+ (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \
+ !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \
+ !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \
+ vec_load(p) : vec_loadu(p))
+#endif
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+#define vec_store(p,x) (_mm_store_pd(&(p),x))
+#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x))
+#if defined(KRANC_CACHE)
+# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x))
+#else
+# define vec_store_nta(p,x) (_mm_store_pd(&(p),x))
+#endif
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x))
+#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x))
+
+
+
+// Functions and operators
+
+// Operators
+#undef fneg
+#undef fmul
+#undef fdiv
+#undef fadd
+#undef fsub
+#if defined(__PGI) && defined (__amd64__)
+// The PGI compiler does not understand __m128d literals
+static union {
+ unsigned long long s[CCTK_REAL_VEC_SIZE];
+ CCTK_REAL_VEC v;
+} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL};
+# define vec_neg_mask (vec_neg_mask_impl.v)
+#else
+# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL})
+#endif
+#define fneg(x) (_mm_xor_pd(x,vec_neg_mask))
+#define fmul(x,y) (_mm_mul_pd(x,y))
+#define fdiv(x,y) (_mm_div_pd(x,y))
+#define fadd(x,y) (_mm_add_pd(x,y))
+#define fsub(x,y) (_mm_sub_pd(x,y))
+
+// Cheap functions
+#undef kfabs
+#undef kfmax
+#undef kfmin
+#undef ksqrt
+#if defined(__PGI) && defined (__amd64__)
+// The PGI compiler does not understand __m128d literals
+static union {
+ unsigned long long s[CCTK_REAL_VEC_SIZE];
+ CCTK_REAL_VEC v;
+} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL};
+# define vec_fabs_mask (vec_fabs_mask_impl.v)
+#else
+# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL})
+#endif
+#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask))
+#define kfmax(x,y) (_mm_max_pd(x,y))
+#define kfmin(x,y) (_mm_min_pd(x,y))
+#define ksqrt(x) (_mm_sqrt_pd(x))
+
+// Expensive functions
+#undef kexp
+#undef klog
+#undef kpow
+#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); })
+#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); })
+#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); })
+
+
+
+#undef Sign
+#define Sign(x) (42)
+
+#undef ToReal
+#define ToReal(x) (vec_set1(x))
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh
new file mode 100644
index 0000000..4a4eea6
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh
@@ -0,0 +1,194 @@
+// Vectorise using Intel's or AMD's SSE2
+
+
+
+#include <emmintrin.h>
+
+// Vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ typedef double S;
+ typedef __m128d V;
+ V v;
+
+ // Convert from and to the underlying vector type
+ inline CCTK_REAL_VEC(V const v_): v(v_) { }
+ inline operator V const() const { return v; }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }
+};
+
+// Number of vector elements in a CCTK_REAL_VEC
+static
+int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+
+
+
+// Create vectors, extract vector elements
+
+DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a))
+DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a))
+
+// Get a scalar from the vector
+#if defined(__PGI) && defined (__amd64__)
+// _mm_cvtsd_f64 does not exist on PGI compilers
+// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; }))
+// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x))
+// This generates the fastest code with PGI compilers
+DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; }))
+#else
+DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op
+#endif
+DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x)))
+
+
+
+// Load and store vectors
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p))
+DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p))
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: default to unaligned load
+template<int mod>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p))
+template<int mod1,int mod2,int mod3>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p))
+// Implementation: load aligned if the modulus is zero
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+// Call the implementation with the modulus
+#define vec_loadu_maybe(off,p) \
+ (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p)))
+#define vec_loadu_maybe3(off1,off2,off3,p) \
+ (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \
+ (off2)&(CCTK_REAL_VEC_SIZE-1), \
+ (off3)&(CCTK_REAL_VEC_SIZE-1)>(p))
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x))
+DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x))
+#if defined(KRANC_CACHE)
+DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x))
+#else
+DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x))
+#endif
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+static inline
+void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n)
+{
+ switch (n) {
+ case 1: _mm_storel_pd(&p,x); break;
+ default: assert(0);
+ }
+}
+static inline
+void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n)
+{
+ switch (n) {
+ case 1: _mm_storeh_pd((&p)+1,x); break;
+ default: assert(0);
+ }
+}
+
+
+
+// Functions and operators
+
+// Single-argument operators
+#if 0
+DEFINE_FUNCTION_V_V(operator+,x)
+static CCTK_REAL_VEC const vec_neg_mask =
+ (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL };
+DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask))
+#endif
+DEFINE_FUNCTION_V_V(operator+,+x.v)
+DEFINE_FUNCTION_V_V(operator-,-x.v)
+
+// Double-argument operators, both vectors
+#if 0
+DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y))
+#endif
+DEFINE_FUNCTION_VV_V(operator+,x.v+y.v)
+DEFINE_FUNCTION_VV_V(operator-,x.v-y.v)
+DEFINE_FUNCTION_VV_V(operator*,x.v*y.v)
+DEFINE_FUNCTION_VV_V(operator/,x.v/y.v)
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x)
+DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x)
+DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x)
+DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x)
+
+// Cheap functions
+#if defined(__PGI) && defined (__amd64__)
+// The PGI compiler does not understand __m128d literals
+static union {
+ CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE];
+ CCTK_REAL_VEC::V v;
+} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL };
+# define vec_fabs_mask (vec_fabs_mask_impl.v)
+#else
+static CCTK_REAL_VEC const vec_fabs_mask =
+ (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL };
+#endif
+DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask))
+DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y))
+DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y))
+DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x))))
+DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x))))
+DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a)))
+
+
+
+#undef Sign
+#define Sign(x) (42)
+
+#undef ToReal
+#define ToReal(x) vec_set1(x)
+
+#if defined(__PGI) && defined (__amd64__)
+// Special case for PGI 9.0.4 to avoid an internal compiler error
+#undef IfThen
+static inline
+CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y)
+{
+ union {
+ __m128i vi;
+ CCTK_REAL_VEC::V v;
+ } mask;
+ mask.vi = _mm_set1_epi64x(-(long long)cond);
+ return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v));
+}
+#endif
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh
new file mode 100644
index 0000000..7e06017
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh
@@ -0,0 +1,111 @@
+// Vectorise using IBM's Altivec
+
+// Use the type vector double directly, without introducing a wrapper class
+// Use macros instead of inline functions
+
+
+
+#include <altivec.h>
+
+// Vector type corresponding to CCTK_REAL
+typedef vector double CCTK_REAL_VEC;
+
+// Number of vector elements in a CCTK_REAL_VEC
+static
+int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+
+
+
+// Create vectors, extract vector elements
+
+#define vec_set1(a) (vec_splats(a))
+#if defined(__GNUC__)
+// GNU doesn't support array indices on vectors
+union vec_mask {
+ double elts[2];
+ vector double v;
+};
+# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; })
+#else
+# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; })
+#endif
+
+// Get a scalar from the vector
+#if defined(__GNUC__)
+// GNU doesn't support array indices on vectors
+# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; })
+# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; })
+#else
+# define vec_elt0(x) ((x)[0])
+# define vec_elt1(x) ((x)[1])
+#endif
+
+
+
+// Load and store vectors
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p))
+#define vec_loadu(p) (vec_load(p))
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+#define vec_loadu_maybe(off,p) (vec_load(p))
+#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p))
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x))
+#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x))
+// TODO: Use stvxl instruction?
+#define vec_store_nta(p,x) vec_store(p,x)
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x))
+#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x))
+
+
+
+// Functions and operators
+
+// Other Altivec functions are:
+// nabs: -abs a
+// madd msub nmadd nmsub: [+-]a*b[+-]c
+
+// Triple-argument operators, all vectors
+#undef fmadd
+#undef fmsub
+#undef fnmadd
+#undef fnmsub
+#define fmadd(x,y,z) (vec_madd(x,y,z))
+#define fmsub(x,y,z) (vec_msub(x,y,z))
+#define fnmadd(x,y,z) (vec_nmadd(x,y,z))
+#define fnmsub(x,y,z) (vec_nmsub(x,y,z))
+
+// Cheap functions
+#undef kfabs
+#undef kfmax
+#undef kfmin
+#define kfabs(x) (vec_abs(x))
+#define kfmax(x,y) (vec_max(x,y))
+#define kfmin(x,y) (vec_min(x,y))
+
+// Expensive functions
+#undef kexp
+#undef klog
+#undef kpow
+#undef ksqrt
+#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); })
+#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); })
+#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); })
+#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); })
+
+
+
+#undef Sign
+#define Sign(x) (42)
+
+#undef ToReal
+#define ToReal(x) (vec_set1((CCTK_REAL)(x)))
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh
new file mode 100644
index 0000000..f591647
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh
@@ -0,0 +1,212 @@
+// Vectorise using IBM's Altivec
+
+
+
+#include <altivec.h>
+
+// Vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ typedef double S;
+ typedef vector double V;
+ V v;
+ union vec_mask {
+ S elts[2];
+ V v;
+ };
+
+ // Set a vector from scalars
+#if 0
+ // IBM
+ inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; }
+#endif
+#if 0
+ inline CCTK_REAL_VEC(S const a, S const b):
+ v(vec_mergel(vec_splats(a), vec_splats(b))) { }
+#endif
+ inline CCTK_REAL_VEC(S const a, S const b)
+ {
+ vec_mask x;
+ x.elts[0] = a;
+ x.elts[1] = b;
+ v = x.v;
+ }
+
+ // Set a vector from a scalar, replicating the scalar
+ // Note: Could also use vec_xlds instead
+ inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { }
+
+ // Convert from and to the underlying vector type
+ inline CCTK_REAL_VEC(V const v_): v(v_) { }
+ inline operator V const() const { return v; }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }
+};
+
+// Number of vector elements in a CCTK_REAL_VEC
+static
+int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+
+
+
+// Create vectors, extract vector elements
+DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a))
+DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b))
+
+// Get a scalar from the vector
+#if 0
+// IBM
+DEFINE_FUNCTION_V_R(vec_elt0,x.v[0])
+DEFINE_FUNCTION_V_R(vec_elt1,x.v[1])
+#endif
+static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x)
+{
+ CCTK_REAL_VEC::vec_mask x1;
+ x1.v = x;
+ return x1.elts[0];
+}
+static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x)
+{
+ CCTK_REAL_VEC::vec_mask x1;
+ x1.v = x;
+ return x1.elts[1];
+}
+
+
+
+// Load and store vectors
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+DEFINE_FUNCTION_PR_V(vec_load,p)
+#if 0
+// IBM
+DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast<CCTK_REAL*>(&p)))
+#endif
+DEFINE_FUNCTION_PR_V(vec_loadu,p)
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: default to unaligned load
+template<int mod>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p))
+template<int mod1,int mod2,int mod3>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p))
+// Implementation: load aligned if the modulus is zero
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+// Call the implementation with the modulus
+#define vec_loadu_maybe(off,p) \
+ (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p)))
+#define vec_loadu_maybe3(off1,off2,off3,p) \
+ (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \
+ (off2)&(CCTK_REAL_VEC_SIZE-1), \
+ (off3)&(CCTK_REAL_VEC_SIZE-1)>(p))
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x)
+DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x)
+// TODO: Use stvxl instruction?
+DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x)
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+static inline
+void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n)
+{
+ switch (n) {
+ case 1: p=vec_elt0(x); break;
+ default: assert(0);
+ }
+}
+static inline
+void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n)
+{
+ switch (n) {
+ case 1: (&p)[1]=vec_elt1(x); break;
+ default: assert(0);
+ }
+}
+
+
+
+// Functions and operators
+
+// Other Altivec functions are:
+// nabs: -abs a
+// madd msub nmadd nmsub: [+-]a*b[+-]c
+
+// Single-argument operators
+#if 0
+DEFINE_FUNCTION_V_V(operator+,x)
+DEFINE_FUNCTION_V_V(operator-,vec_neg(x))
+#endif
+DEFINE_FUNCTION_V_V(operator+,+x.v)
+DEFINE_FUNCTION_V_V(operator-,-x.v)
+
+// Double-argument operators, both vectors
+#if 0
+DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y))
+DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y))
+DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y))
+DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y))
+#endif
+DEFINE_FUNCTION_VV_V(operator+,x.v+y.v)
+DEFINE_FUNCTION_VV_V(operator-,x.v-y.v)
+DEFINE_FUNCTION_VV_V(operator*,x.v*y.v)
+DEFINE_FUNCTION_VV_V(operator/,x.v/y.v)
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x)
+DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x)
+DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x)
+DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x)
+
+// Triple-argument operators, all vectors
+#undef fmadd
+#undef fmsub
+#undef fnmadd
+#undef fnmsub
+DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v))
+DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v))
+DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v))
+DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v))
+
+// Cheap functions
+DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v))
+DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v))
+DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x))))
+DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x))))
+DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a)))
+DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x))))
+
+
+
+#undef Sign
+#define Sign(x) (42)
+
+#undef ToReal
+#define ToReal(x) (vec_set1(x))
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh
new file mode 100644
index 0000000..f928ed8
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh
@@ -0,0 +1,31 @@
+// Fallback vectorisation implementation: Do not vectorise
+
+
+
+// Use CCTK_REAL
+typedef CCTK_REAL CCTK_REAL_VEC;
+
+// Number of vector elements in a CCTK_REAL_VEC
+static int const CCTK_REAL_VEC_SIZE = 1;
+
+
+
+// We use macros here, so that we are not surprised by compilers which
+// don't like to inline functions (e.g. PGI). This should also make
+// debug builds (which may not inline) more efficient.
+
+#define vec_load(p) (p)
+#define vec_loadu(p) (p)
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+#define vec_loadu_maybe(off,p) (p)
+#define vec_loadu_maybe3(off1,off2,off3,p) (p)
+
+#define vec_store(p,x) ((p)=(x))
+#define vec_store_nta(p,x) ((p)=(x))
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+#define vec_store_nta_partial_lo(p,x,n) (assert(0))
+#define vec_store_nta_partial_hi(p,x,n) (assert(0))
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh
new file mode 100644
index 0000000..f5c0b22
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh
@@ -0,0 +1,104 @@
+// Define some macros that simplify defining short function that are
+// supposed to be inlined
+
+
+
+// Letters defining the prototype (argument and return value types):
+// I: i,j: integer
+// R: a,b: real
+// V: x,y: vector (of real)
+// P: p,q: pointer (i.e. const reference) to something
+// L: l,m: L-value (i.e. non-const reference) to something
+
+
+
+// Load and store
+
+#define DEFINE_FUNCTION_PR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const& p) \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_PRV(name,expr) \
+static inline \
+void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \
+{ \
+ expr; \
+}
+
+#define DEFINE_FUNCTION_PVR(name,expr) \
+static inline \
+void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \
+{ \
+ expr; \
+}
+
+
+
+// Functions and operators
+
+#define DEFINE_FUNCTION_V_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_V_R(name,expr) \
+static inline \
+CCTK_REAL name (CCTK_REAL_VEC const x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_R_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const a) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_VV_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_VR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_RV_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_RR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_VVV_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh
new file mode 100644
index 0000000..df83b3a
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh
@@ -0,0 +1,591 @@
+#ifndef VECTORS_HH
+#define VECTORS_HH
+
+
+
+// Vectorisation
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+
+#include <cctk.h>
+
+
+
+// I: i,j: integer
+// R: a,b: real
+// V: x,y: vector (of real)
+// P: p,q: pointer (i.e. const reference) to something
+// L: l,m: L-value (i.e. non-const reference) to something
+
+#define DEFINE_FUNCTION_PR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const& p) \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_PRV(name,expr) \
+static inline \
+void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \
+{ \
+ expr; \
+}
+
+#define DEFINE_FUNCTION_PVR(name,expr) \
+static inline \
+void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \
+{ \
+ expr; \
+}
+
+#define DEFINE_FUNCTION_V_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_V_R(name,expr) \
+static inline \
+CCTK_REAL name (CCTK_REAL_VEC const& x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_R_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const& a) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_VV_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_VR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_RV_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+#define DEFINE_FUNCTION_RR_V(name,expr) \
+static inline \
+CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \
+ CCTK_ATTRIBUTE_PURE \
+{ \
+ return expr; \
+}
+
+
+
+// Intel, double
+#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8)
+
+#include <emmintrin.h>
+
+// Vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ typedef double S;
+ typedef __m128d V;
+ V v;
+
+ // Set a vector from scalars
+ inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { }
+
+ // Set a vector from a scalar, replicating the scalar
+ inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { }
+
+ // Convert from and to the underlying vector type
+ inline CCTK_REAL_VEC(V const& v_): v(v_) { }
+ inline operator V const() const { return v; }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }
+};
+
+union vec_mask {
+ unsigned long long bits[2];
+ CCTK_REAL_VEC::V v;
+};
+
+DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a))
+DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a))
+
+// Get a scalar from the vector
+#if defined(__PGI) && defined (__amd64__)
+// _mm_cvtsd_f64 does not exist on PGI compilers
+static inline
+CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x)
+{
+ CCTK_REAL a; _mm_store_sd(&a,x); return a;
+}
+#else
+DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op
+#endif
+
+#if 0
+DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1))))
+#endif
+static inline
+CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x)
+{
+ CCTK_REAL a; _mm_storeh_pd(&a,x); return a;
+}
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p))
+DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p))
+
+#if 0
+// Load a partial vector (duplicating the last loaded element to fill
+// the remaining elements)
+// TODO: Should this be aligned or unaligned?
+static inline
+CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n)
+{
+ switch (n) {
+ case 1: return _mm_load1_pd(p);
+ default: assert(0);
+ }
+}
+#endif
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: default to unaligned load
+template<int mod>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p))
+template<int mod1,int mod2,int mod3>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p))
+// Implementation: load aligned if the modulus is zero
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+template<>
+inline
+CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p)
+{
+ return vec_load(p);
+}
+// Call the implementation with the modulus
+template<int off>
+static inline
+CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p)
+{
+ return vec_loadu_maybe_impl<off&(2-1)>(p);
+}
+template<int off1,int off2, int off3>
+static inline
+CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p)
+{
+ return vec_loadu_maybe_impl3<off1&(2-1),off2&(2-1),off3&(2-1)>(p);
+}
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x))
+DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x))
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+static inline
+void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ switch (n) {
+ case 1: _mm_storel_pd(&p,x); break;
+ default: assert(0);
+ }
+}
+static inline
+void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ switch (n) {
+ case 1: _mm_storeh_pd((&p)+1,x); break;
+ default: assert(0);
+ }
+}
+
+// Double-argument operators, both vectors
+DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y))
+DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y))
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x)
+DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x)
+DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x)
+DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x)
+
+// Single-argument operators
+DEFINE_FUNCTION_V_V(operator+,x)
+#if 0
+DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x)
+#endif
+static vec_mask const vec_neg_mask =
+{ { 0x8000000000000000ULL, 0x8000000000000000ULL } };
+DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v))
+
+// Cheap functions
+static vec_mask const vec_fabs_mask =
+{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } };
+DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v))
+DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y))
+DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y))
+DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x))))
+DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x))))
+DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a)))
+
+// Special case for PGI to avoid internal compiler error
+#if defined(__PGI) && defined (__amd64__)
+#undef IfThen
+CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\
+nst& y)
+{
+ return cond*x + (not cond)*y;
+}
+#endif
+
+
+
+#if 0
+// Try to use the __m128d type directly.
+
+// This does not really work, because it is not possible to define
+// automatic conversion operators from double to __m128d, so that
+// explicit conversions are required. This makes the code look more
+// clumsy.
+
+// Vector type corresponding to CCTK_REAL
+typedef __m128d CCTK_REAL_VEC;
+
+DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a))
+DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a))
+
+// Get a scalar from the vector
+static inline
+CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x)
+{
+#if 0
+ // _mm_cvtsd_f64 does not exist on PGI compilers
+ return _mm_cvtsd_f64(x); // this is a no-op
+#endif
+ CCTK_REAL a; _mm_store_sd(&a,x); return a;
+}
+
+DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1))))
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p))
+DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p))
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x))
+DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x))
+
+// Cheap functions
+static vec_mask const vec_fabs_mask =
+{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } };
+DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v))
+DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y))
+DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y))
+DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x))))
+DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x))))
+DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a)))
+
+#endif
+
+
+
+// Intel, float
+#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4)
+
+#include <xmmintrin.h>
+
+// A vector type corresponding to CCTK_REAL
+typedef __m128 CCTK_REAL_VEC;
+
+
+
+// Power, double
+#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8)
+
+#include <altivec.h>
+
+// Vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ typedef double S;
+ typedef vector double V;
+ V v;
+
+ // vec_insert, vec_extract, vec_splat
+
+ // Set a vector from scalars
+ inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; }
+
+ // Set a vector from a scalar, replicating the scalar
+ inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { }
+
+ // Convert from and to the underlying vector type
+ inline CCTK_REAL_VEC(V const& v_): v(v_) { }
+ inline operator V const() const { return v; }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }
+};
+
+DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a))
+DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b))
+
+// Get a scalar from the vector
+DEFINE_FUNCTION_V_R(vec_elt0,x.v[0])
+DEFINE_FUNCTION_V_R(vec_elt1,x.v[1])
+
+// Load a vector from memory (aligned and unaligned); this loads from
+// a reference to a scalar
+DEFINE_FUNCTION_PR_V(vec_load,p)
+DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast<CCTK_REAL*>(&p)))
+// vec_xlds
+
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: default to unaligned load
+template<int mod>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p))
+// Implementation: load aligned if the modulus is zero
+#define static
+template<>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p))
+#undef static
+// Call the implementation with the modulus
+template<int off>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl<off&(2-1)>(p))
+
+// Store a vector to memory (aligned and non-temporal); this stores to
+// a reference to a scalar
+DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x)
+DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x)
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+static inline
+void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ switch (n) {
+ case 1: p=x.v[0]; break;
+ default: assert(0);
+ }
+}
+static inline
+void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ switch (n) {
+ case 1: (&p)[1]=x.v[1]; break;
+ default: assert(0);
+ }
+}
+
+// Double-argument operators, both vectors
+DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y))
+DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y))
+DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y))
+DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y))
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a))
+DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x)
+DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x)
+DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x)
+DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x)
+
+// Single-argument operators
+DEFINE_FUNCTION_V_V(operator+,x)
+DEFINE_FUNCTION_V_V(operator-,vec_neg(x))
+
+// Cheap functions
+DEFINE_FUNCTION_V_V(fabs,vec_abs(x))
+DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y))
+DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x))))
+DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x))))
+DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a)))
+DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x))))
+
+
+
+// Fallback: pseudo-vectorisation
+#elif 0
+
+// There is no vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ CCTK_REAL v, w;
+
+ // Set a vector from scalars
+ inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { }
+
+ // Set a vector from a scalar, replicating the scalar
+ inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { }
+};
+
+
+
+DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p)
+DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p))
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+template<int off>
+DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p))
+
+DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x)
+DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x))
+
+// Double-argument operators, both vectors
+DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w))
+DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w))
+DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w))
+DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w))
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a))
+DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a))
+DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a))
+DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w))
+DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w))
+DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w))
+DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w))
+
+// Single-argument operators
+DEFINE_FUNCTION_V_V(operator+,x)
+DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w))
+
+// Cheap functions
+DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w)))
+DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w)))
+DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w)))
+DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w)))
+
+// Expensive functions
+DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w)))
+DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w)))
+DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a)))
+
+
+
+// Fallback: no vectorisation
+#else
+
+// There is no vector type corresponding to CCTK_REAL
+typedef CCTK_REAL CCTK_REAL_VEC;
+
+
+
+DEFINE_FUNCTION_PR_V(vec_load,p)
+DEFINE_FUNCTION_PR_V(vec_loadu,p)
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+// Implementation: default to unaligned load
+template<int off>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p)
+template<int off1,int off2, int off3>
+DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p)
+
+DEFINE_FUNCTION_PRV(vec_store,p=x)
+DEFINE_FUNCTION_PRV(vec_store_nta,p=x)
+
+// Store a lower or higher partial vector (aligned and non-temporal);
+// the non-temporal hint is probably ignored
+static inline
+void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ assert(0);
+}
+static inline
+void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n)
+{
+ assert(0);
+}
+
+
+
+#endif
+
+
+
+#undef DEFINE_FUNCTION_PR_V
+#undef DEFINE_FUNCTION_PRV
+#undef DEFINE_FUNCTION_V_V
+#undef DEFINE_FUNCTION_R_V
+#undef DEFINE_FUNCTION_VV_V
+#undef DEFINE_FUNCTION_VR_V
+#undef DEFINE_FUNCTION_RV_V
+#undef DEFINE_FUNCTION_RR_V
+
+
+
+// Number of vector elements in a CCTK_REAL_VEC
+static
+int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+
+
+
+#endif // #ifndef VECTORS_HH
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh
new file mode 100644
index 0000000..f439c9b
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh
@@ -0,0 +1,72 @@
+// Pseudo vectorisation using scalar operations
+
+
+
+// Number of vector elements in a CCTK_REAL_VEC
+static int const CCTK_REAL_VEC_SIZE = 2;
+
+// There is no vector type corresponding to CCTK_REAL
+struct CCTK_REAL_VEC {
+ // The underlying scalar and vector types
+ CCTK_REAL v[CCTK_REAL_VEC_SIZE];
+
+ // Set a vector from scalars
+ inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { }
+
+ // Set a vector from a scalar, replicating the scalar
+ inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { }
+
+ inline CCTK_REAL_VEC() { }
+
+ // Copy constructor
+ inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; }
+};
+
+
+
+// Load and store vectors
+
+DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p)
+DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p))
+// Load a vector from memory that may or may not be aligned, as
+// decided by the offset off and the vector size
+#define vec_loadu_maybe(off,p) (vec_load(p))
+#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p))
+
+DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x)
+DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x))
+
+
+
+// Functions and operators
+
+// Double-argument operators, both vectors
+DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1]))
+DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1]))
+DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1]))
+DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1]))
+
+// Double-argument operators, vector and scalar
+DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a))
+DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a))
+DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a))
+DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a))
+
+// Double-argument operators, scalar and vector
+DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1]))
+DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1]))
+DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1]))
+DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1]))
+
+// Single-argument operators
+DEFINE_FUNCTION_V_V(operator+,x)
+DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1]))
+
+// Functions
+DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1])))
+DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1])))
+DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1])))
+DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1])))
+DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1])))
+DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a)))
+DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1])))
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh
new file mode 100644
index 0000000..0d950c7
--- /dev/null
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh
@@ -0,0 +1,14 @@
+// Undefine all macros defined in "Vectors-define.hh", so that we
+// leave a clean namespace
+
+
+
+#undef DEFINE_FUNCTION_PR_V
+#undef DEFINE_FUNCTION_PRV
+#undef DEFINE_FUNCTION_V_V
+#undef DEFINE_FUNCTION_R_V
+#undef DEFINE_FUNCTION_VV_V
+#undef DEFINE_FUNCTION_VR_V
+#undef DEFINE_FUNCTION_RV_V
+#undef DEFINE_FUNCTION_RR_V
+#undef DEFINE_FUNCTION_VVV_V
diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh
index 3fb77e1..d32afb2 100644
--- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh
+++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh
@@ -5,231 +5,47 @@
// Vectorisation
+#include <assert.h>
#include <math.h>
#include <stdlib.h>
-#include <cctk.h>
-
-
-
-// I: i,j: integer
-// R: a,b: real
-// V: x,y: vector (of real)
-// P: p,q: pointer (i.e. const reference) to something
-// L: l,m: L-value (i.e. non-const reference) to something
-
-#define DEFINE_FUNCTION_PR_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL const& p) \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_PRV(name,expr) \
-inline \
-void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \
-{ \
- expr; \
-}
-
-#define DEFINE_FUNCTION_PVR(name,expr) \
-inline \
-void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \
-{ \
- expr; \
-}
-
-#define DEFINE_FUNCTION_V_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return CCTK_REAL_VEC(expr); \
-}
-
-#define DEFINE_FUNCTION_V_R(name,expr) \
-inline \
-CCTK_REAL name (CCTK_REAL_VEC const& x) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_R_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL const& a) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_VV_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_VR_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_RV_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-#define DEFINE_FUNCTION_RR_V(name,expr) \
-inline \
-CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \
- CCTK_ATTRIBUTE_PURE \
-{ \
- return expr; \
-}
-
-
-
-// Intel, double
-#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8)
-
-#include <emmintrin.h>
-
-// Vector type corresponding to CCTK_REAL
-struct CCTK_REAL_VEC {
- // The underlying scalar and vector types
- typedef double S;
- typedef __m128d V;
- static int const n = sizeof(V)/sizeof(S);
- V v;
-
- // Set a vector from scalars
- CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(a,b)) { };
-
- // Get a scalar from the vector
- S elt0() const { return _mm_cvtsd_f64(v); /* this is a no-op */ }
- S elt1() const { return _mm_cvtsd_f64(_mm_shuffle_pd(v,v,_MM_SHUFFLE2(1,1))); }
-
- // Set a vector from a scalar, replicating the scalar
- CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { };
-
- // Convert from and to the underlying vector type
- CCTK_REAL_VEC(V const& v_): v(v_) { };
- operator V const() const { return v; }
-
- CCTK_REAL_VEC() { };
-
- // Copy constructor
- CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { };
-};
-
-// Load a vector from memory (aligned and unaligned); this loads from
-// a reference to a scalar
-DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p));
-DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p));
-
-// Store a vector to memory (aligned and non-temporal); this stores to
-// a reference to a scalar
-DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x))
-DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x))
-
-// Double-argument operators, both vectors
-DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y))
-DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y))
-DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y))
-DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y))
-
-// Double-argument operators, vector and scalar
-DEFINE_FUNCTION_VR_V(operator+,x+CCTK_REAL_VEC(a))
-DEFINE_FUNCTION_VR_V(operator-,x-CCTK_REAL_VEC(a))
-DEFINE_FUNCTION_VR_V(operator*,x*CCTK_REAL_VEC(a))
-DEFINE_FUNCTION_VR_V(operator/,x/CCTK_REAL_VEC(a))
-
-// Double-argument operators, scalar and vector
-DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a)+x)
-DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a)-x)
-DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a)*x)
-DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a)/x)
-
-// Single-argument operators
-DEFINE_FUNCTION_V_V(operator+,x)
-DEFINE_FUNCTION_V_V(operator-,0.0-x)
-
-// Cheap functions
-static union {
- unsigned long long const bits[2];
- CCTK_REAL_VEC::V v;
-} const fabs_mask =
- { { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } };
-DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,fabs_mask.v))
-DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y))
-DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y))
-DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x))
-
-// Expensive functions
-DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.elt0()),exp(x.elt1())))
-DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.elt0()),log(x.elt1())))
-DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.elt0(),a),pow(x.elt1(),a)))
-
-// Un-implemented functions
-DEFINE_FUNCTION_V_R(signbit,0)
-
-
-
-#if 0
-// Intel, float
-#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4)
-
-#include <xmmintrin.h>
-
-// A vector type corresponding to CCTK_REAL
-typedef __m128 CCTK_REAL_VEC;
-#endif
-
+#include <cctk.h>
-// Fallback: no vectorisation
-#else
-
-// There is no vector type corresponding to CCTK_REAL
-typedef CCTK_REAL CCTK_REAL_VEC;
+#include "Vectors-define.hh"
-DEFINE_FUNCTION_PR_V(vec_load,p)
-DEFINE_FUNCTION_PR_V(vec_loadu,p)
+#if defined(KRANC_VECTORS)
+// Vectorise
-DEFINE_FUNCTION_PRV(vec_store,p=x)
-DEFINE_FUNCTION_PRV(vec_store_nta,p=x)
+# if ! defined(CCTK_REAL_PRECISION_8)
+# error "Vectorisation is currently only supported for double precision"
+# endif
-DEFINE_FUNCTION_V_R(signbit,x<0)
+# if defined(__SSE2__) // SSE2 (Intel)
+# if defined(KRANC_DIRECT)
+# include "Vectors-SSE2-direct.hh"
+# else
+# include "Vectors-SSE2.hh"
+# endif
+# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power)
+# if defined(KRANC_DIRECT)
+# include "Vectors-VSX-direct.hh"
+# else
+# include "Vectors-VSX.hh"
+# endif
+# else
+# include "Vectors-pseudo.hh"
+# endif
+#else
+// Don't vectorise
+# include "Vectors-default.hh"
#endif
-
-
-#undef DEFINE_FUNCTION_PR_V
-#undef DEFINE_FUNCTION_PRV
-#undef DEFINE_FUNCTION_V_V
-#undef DEFINE_FUNCTION_R_V
-#undef DEFINE_FUNCTION_VV_V
-#undef DEFINE_FUNCTION_VR_V
-#undef DEFINE_FUNCTION_RV_V
-#undef DEFINE_FUNCTION_RR_V
-
-
-
-// Number of vector elements in a CCTK_REAL_VEC
-static
-size_t const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL);
+#include "Vectors-undefine.hh"
diff --git a/Tools/CodeGen/CalculationFunction.m b/Tools/CodeGen/CalculationFunction.m
index 3d86503..df79553 100644
--- a/Tools/CodeGen/CalculationFunction.m
+++ b/Tools/CodeGen/CalculationFunction.m
@@ -178,7 +178,7 @@ localName[x_] :=
definePreDefinitions[pDefs_] :=
CommentedBlock["Initialize predefined quantities",
- Map[DeclareAssignVariable["CCTK_REAL", #[[1]], #[[2]]] &, pDefs]];
+ Map[DeclareAssignVariable["CCTK_REAL_VEC", #[[1]], #[[2]]] &, pDefs]];
(* --------------------------------------------------------------------------
Equations
@@ -326,7 +326,7 @@ Options[CreateCalculationFunction] = ThornOptions;
CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] :=
Module[{gfs, allSymbols, knownSymbols,
- shorts, eqs, parameters,
+ shorts, eqs, parameters, parameterRules,
functionName, dsUsed, groups, pddefs, cleancalc, eqLoop, where,
addToStencilWidth, pDefs, haveCondTextuals, condTextuals},
@@ -366,10 +366,14 @@ CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] :=
If[!lookupDefault[cleancalc, NoSimplify, False],
InfoMessage[InfoFull, "Simplifying equations", eqs];
- eqs = Simplify[eqs, {r>0}]]];
+ eqs = Simplify[eqs, {r>=0}]]];
InfoMessage[InfoFull, "Equations:"];
+ (* Wrap parameters with ToReal *)
+ parameterRules = Map[(#->ToReal[#])&, parameters];
+ eqs = eqs /. parameterRules;
+
Map[printEq, eqs];
(* Check all the function names *)
@@ -537,7 +541,7 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_,
GenericGridLoop[functionName,
{
- DeclareDerivatives[defsWithoutShorts, eqsOrdered],
+ (* DeclareDerivatives[defsWithoutShorts, eqsOrdered], *)
CommentedBlock["Assign local copies of grid functions",
Map[DeclareMaybeAssignVariableInLoop[
@@ -558,6 +562,22 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_,
Map[InfoVariable[#[[1]]] &, (eqs2 /. localMap)],
""],
+ CommentedBlock["If necessary, store only partial vectors after the first iteration",
+ ConditionalOnParameterTextual["CCTK_REAL_VEC_SIZE > 1 && i<lc_imin",
+ {
+ DeclareAssignVariable["int", "elt_count", "lc_imin-i"],
+ Map[StoreHighPartialVariableInLoop[GridName[#], localName[#], "elt_count"] &,
+ gfsInLHS],
+ "continue;\n"
+ }]],
+ CommentedBlock["If necessary, store only partial vectors after the last iteration",
+ ConditionalOnParameterTextual["CCTK_REAL_VEC_SIZE > 1 && i+CCTK_REAL_VEC_SIZE > lc_imax",
+ {
+ DeclareAssignVariable["int", "elt_count", "lc_imax-i"],
+ Map[StoreLowPartialVariableInLoop[GridName[#], localName[#], "elt_count"] &,
+ gfsInLHS],
+ "break;\n"
+ }]],
CommentedBlock["Copy local copies back to grid functions",
Map[StoreVariableInLoop[GridName[#], localName[#]] &,
gfsInLHS]],
diff --git a/Tools/CodeGen/CodeGen.m b/Tools/CodeGen/CodeGen.m
index de8dbd2..09e7fe1 100644
--- a/Tools/CodeGen/CodeGen.m
+++ b/Tools/CodeGen/CodeGen.m
@@ -62,14 +62,16 @@ AssignVariableInLoop::usage = "AssignVariableInLoop[dest_, src_] returns a block
"that assigns 'src' to 'dest'.";
StoreVariableInLoop::usage = "StoreVariableInLoop[dest_, src_] returns a block of code " <>
"that assigns 'src' to 'dest'.";
+StoreLowPartialVariableInLoop::usage = "StoreLowPartialVariableInLoop[dest_, src_, count_] returns a block of code " <>
+ "that assigns 'src' to 'dest'.";
+StoreHighPartialVariableInLoop::usage = "StoreHighPartialVariableInLoop[dest_, src_, count_] returns a block of code " <>
+ "that assigns 'src' to 'dest'.";
DeclareAssignVariableInLoop::usage = "DeclareAssignVariableInLoop[type_, dest_, src_] returns a block of code " <>
"that assigns 'src' to 'dest'.";
MaybeAssignVariableInLoop::usage = "MaybeAssignVariableInLoop[dest_, src_, cond_] returns a block of code " <>
"that assigns 'src' to 'dest'.";
DeclareMaybeAssignVariableInLoop::usage = "DeclareMaybeAssignVariableInLoop[type_, dest_, src_, cond_] returns a block of code " <>
"that assigns 'src' to 'dest'.";
-UNUSEDDeclareVariablesInLoopVectorised::usage = "";
-UNUSEDAssignVariablesInLoopVectorised::usage = "";
TestForNaN::usage = "TestForNaN[expr_] returns a block of code " <>
"that tests 'expr' for nan.";
CommentedBlock::usage = "CommentedBlock[comment, block] returns a block consisting " <>
@@ -285,62 +287,24 @@ AssignVariableInLoop[dest_, src_] :=
StoreVariableInLoop[dest_, src_] :=
{"vec_store_nta(", dest, ",", src, ")", EOL[]};
+StoreLowPartialVariableInLoop[dest_, src_, count_] :=
+ {"vec_store_nta_partial_lo(", dest, ",", src, ",", count, ")", EOL[]};
+
+StoreHighPartialVariableInLoop[dest_, src_, count_] :=
+ {"vec_store_nta_partial_hi(", dest, ",", src, ",", count, ")", EOL[]};
+
DeclareAssignVariableInLoop[type_, dest_, src_] :=
{type, " const ", dest, " = vec_load(", src, ")", EOL[]};
MaybeAssignVariableInLoop[dest_, src_, cond_] :=
If [cond,
- {dest, " = useMatter ? vec_load(", src, ") : 0.0", EOL[]},
+ {dest, " = useMatter ? vec_load(", src, ") : ToReal(0.0)", EOL[]},
{dest, " = vec_load(", src, ")", EOL[]}];
DeclareMaybeAssignVariableInLoop[type_, dest_, src_, mmaCond_, codeCond_] :=
If [mmaCond,
- {type, " ", dest, " = (", codeCond, ") ? vec_load(", src, ") : 0.0", EOL[]},
- {type, " ", dest, " = vec_load(", src, ")", EOL[]}];
-
-(* TODO: move these into OpenMP loop *)
-UNUSEDDeclareVariablesInLoopVectorised[dests_, temps_, srcs_] :=
- {
- {"#undef LC_PRELOOP_STATEMENTS", "\n"},
- {"#define LC_PRELOOP_STATEMENTS", " \\\n"},
- {"int const GFD_imin = lc_imin + ((lc_imin + cctk_lsh[0] * (j + cctk_lsh[1] * k)) & (CCTK_REAL_VEC_SIZE-1))", "; \\\n"},
- {"int const GFD_imax = lc_imax + ((lc_imax + cctk_lsh[0] * (j + cctk_lsh[1] * k)) & (CCTK_REAL_VEC_SIZE-1)) - CCTK_REAL_VEC_SIZE", "; \\\n"},
- Map[Function[x, Module[{dest, temp, src},
- {dest, temp, src} = x;
- {"CCTK_REAL_VEC ", temp, "; \\\n"}]],
- Transpose[{dests, temps, srcs}]],
- {"\n"}
- };
-
-UNUSEDAssignVariablesInLoopVectorised[dests_, temps_, srcs_] :=
- {
- {"{\n"},
- {" if (i < GFD_imin || i >= GFD_imax) {\n"},
- Map[Function[x, Module[{dest, temp, src},
- {dest, temp, src} = x;
- {" ", dest, "[index] = ", src, EOL[]}]],
- Transpose[{dests, temps, srcs}]],
- {" } else {\n"},
- {" size_t const index0 = index & (CCTK_REAL_VEC_SIZE-1)", EOL[]},
- Map[Function[x, Module[{dest, temp, src},
- {dest, temp, src} = x;
- {" ((CCTK_REAL*)&", temp, ")[index0] = ",
- src, EOL[]}]],
- Transpose[{dests, temps, srcs}]],
- {" if (index0 == CCTK_REAL_VEC_SIZE-1) {\n"},
- {" size_t const index1 = index - (CCTK_REAL_VEC_SIZE-1)", EOL[]},
- Map[Function[x, Module[{dest, temp, src},
- {dest, temp, src} = x;
- {" _mm_stream_pd (&", dest, "[index1], ",
- temp, ")", EOL[]}]],
- Transpose[{dests, temps, srcs}]],
- {" }\n"},
- {" }\n"},
- {"}\n"}
- };
-
-UNUSEDAssignVariableInLoopsVectorised[dest_, temp_, src_] :=
- {"GFD_save_and_store(", dest, ",", "index", ",", "&", temp, ",", src, ")", EOL[]};
+ {type, " ", dest, " = (", codeCond, ") ? vec_load(", src, ") : ToReal(0.0)", EOL[]},
+ {type, " ", dest, " = vec_load(", src, ")", EOL[]}];
TestForNaN[expr_] :=
{"if (isnan(", expr, ")) {\n",
@@ -463,13 +427,13 @@ DeclareFDVariables[] :=
InitialiseFDSpacingVariablesC[] :=
{
- DeclareAssignVariable["CCTK_REAL", "dx", "CCTK_DELTA_SPACE(0)"],
- DeclareAssignVariable["CCTK_REAL", "dy", "CCTK_DELTA_SPACE(1)"],
- DeclareAssignVariable["CCTK_REAL", "dz", "CCTK_DELTA_SPACE(2)"],
(* DeclareAssignVariable["int", "di", "CCTK_GFINDEX3D(cctkGH,1,0,0) - CCTK_GFINDEX3D(cctkGH,0,0,0)"], *)
DeclareAssignVariable["int", "di", "1"],
DeclareAssignVariable["int", "dj", "CCTK_GFINDEX3D(cctkGH,0,1,0) - CCTK_GFINDEX3D(cctkGH,0,0,0)"],
- DeclareAssignVariable["int", "dk", "CCTK_GFINDEX3D(cctkGH,0,0,1) - CCTK_GFINDEX3D(cctkGH,0,0,0)"]
+ DeclareAssignVariable["int", "dk", "CCTK_GFINDEX3D(cctkGH,0,0,1) - CCTK_GFINDEX3D(cctkGH,0,0,0)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "dx", "ToReal(CCTK_DELTA_SPACE(0))"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "dy", "ToReal(CCTK_DELTA_SPACE(1))"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "dz", "ToReal(CCTK_DELTA_SPACE(2))"]
};
InitialiseFDSpacingVariablesFortran[] :=
@@ -486,17 +450,17 @@ InitialiseFDVariables[] :=
InitialiseFDSpacingVariablesFortran[],
InitialiseFDSpacingVariablesC[]],
- DeclareAssignVariable["CCTK_REAL", "dxi", "1.0 / dx"],
- DeclareAssignVariable["CCTK_REAL", "dyi", "1.0 / dy"],
- DeclareAssignVariable["CCTK_REAL", "dzi", "1.0 / dz"],
- DeclareAssignVariable["CCTK_REAL", "khalf", "0.5"],
- DeclareAssignVariable["CCTK_REAL", "kthird", "1/3.0"],
- DeclareAssignVariable["CCTK_REAL", "ktwothird", "2.0/3.0"],
- DeclareAssignVariable["CCTK_REAL", "kfourthird", "4.0/3.0"],
- DeclareAssignVariable["CCTK_REAL", "keightthird", "8.0/3.0"],
- DeclareAssignVariable["CCTK_REAL", "hdxi", "0.5 * dxi"],
- DeclareAssignVariable["CCTK_REAL", "hdyi", "0.5 * dyi"],
- DeclareAssignVariable["CCTK_REAL", "hdzi", "0.5 * dzi"]}];
+ DeclareAssignVariable["CCTK_REAL_VEC", "dxi", "INV(dx)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "dyi", "INV(dy)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "dzi", "INV(dz)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "khalf", "ToReal(0.5)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "kthird", "ToReal(1.0/3.0)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "ktwothird", "ToReal(2.0/3.0)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "kfourthird", "ToReal(4.0/3.0)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "keightthird", "ToReal(8.0/3.0)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "hdxi", "fmul(ToReal(0.5), dxi)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "hdyi", "fmul(ToReal(0.5), dyi)"],
+ DeclareAssignVariable["CCTK_REAL_VEC", "hdzi", "fmul(ToReal(0.5), dzi)"]}];
GridName[x_] := If[SOURCELANGUAGE == "C",
ToExpression[ToString[x] <> "[index]"],
@@ -657,20 +621,21 @@ GenericGridLoopUsingLoopControl[functionName_, block_] :=
CommentedBlock["Loop over the grid points",
{
"#pragma omp parallel\n",
- "LC_LOOP3 (", functionName, ",\n",
- " i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],\n",
- " cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])\n",
+ "LC_LOOP3VEC (", functionName, ",\n",
+ " i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],\n",
+ " cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],\n",
+ " CCTK_REAL_VEC_SIZE)\n",
"{\n",
indentBlock[
{
- DeclareVariable["index", "// int"],
- DeclareAssignVariable["int", "index", "CCTK_GFINDEX3D(cctkGH,i,j,k)"],
+ (* DeclareVariable["index", "// int"], *)
+ (* DeclareAssignVariable["int", "index", "CCTK_GFINDEX3D(cctkGH,i,j,k)"], *)
+ DeclareAssignVariable["int", "index", "di*i + dj*j + dk*k"],
block
}
],
- "i += CCTK_REAL_VEC_SIZE-1;\n",
"}\n",
- "LC_ENDLOOP3 (", functionName, ");\n"
+ "LC_ENDLOOP3VEC (", functionName, ");\n"
}
],
""
@@ -797,45 +762,117 @@ insertFile[name_] :=
(* Take an expression x and replace occurrences of Powers with the C
macros SQR, CUB, QAD *)
-ReplacePowers[x_] :=
- Module[{rhs},
- rhs = x /. Power[xx_, -1] -> INV[xx];
+ReplacePowers[expr_] :=
+ Module[{rhs, fmaRules, arithRules},
+ rhs = expr /. Power[xx_, -1] -> INV[xx];
If[SOURCELANGUAGE == "C",
Module[{},
- rhs = rhs //. Power[xx_, 2] -> SQR[xx];
- rhs = rhs //. Power[xx_, 3] -> CUB[xx];
- rhs = rhs //. Power[xx_, 4] -> QAD[xx];
+ rhs = rhs /. Power[xx_, 2 ] -> SQR[xx];
+ rhs = rhs /. Power[xx_, 3 ] -> CUB[xx];
+ rhs = rhs /. Power[xx_, 4 ] -> QAD[xx];
+ rhs = rhs /. Power[xx_, -2 ] -> INV[SQR[xx]];
+ rhs = rhs /. Power[xx_, 1/2] -> sqrt[xx];
+ rhs = rhs /. Power[xx_, -1/2] -> INV[sqrt[xx]];
+ rhs = rhs /. Power[xx_, 0.5] -> sqrt[xx];
+ rhs = rhs /. Power[xx_, -0.5] -> INV[sqrt[xx]];
- rhs = rhs //. xx_/2 -> khalf xx;
- rhs = rhs //. (-1/2) -> -khalf;
+ (*
+ rhs = rhs /. 1/2 -> khalf
+ rhs = rhs /. -1/2 -> -khalf;
- rhs = rhs //. xx_/3 -> kthird xx;
- rhs = rhs //. (-1/3) -> -kthird;
+ rhs = rhs /. 1/3 -> kthird;
+ rhs = rhs /. -1/3 -> -kthird;
- rhs = rhs //. 2/3 -> ktwothird;
- rhs = rhs //. (-2/3) -> -ktwothird;
+ rhs = rhs /. 2/3 -> ktwothird;
+ rhs = rhs /. -2/3 -> -ktwothird;
- rhs = rhs //. 4/3 -> kfourthird;
- rhs = rhs //. (-4/3) -> -kfourthird;
+ rhs = rhs /. 4/3 -> kfourthird;
+ rhs = rhs /. -4/3 -> -kfourthird;
- rhs = rhs //. 8/3 -> keightthird;
- rhs = rhs //. (-8/3) -> -keightthird;
+ rhs = rhs /. 8/3 -> keightthird;
+ rhs = rhs /. -8/3 -> -keightthird;
+ *)
- rhs = rhs //. xx_ y_ + xx_ z_ -> xx(y+z);
+ (* Avoid rational numbers *)
+ rhs = rhs /. Rational[xx_,yy_] :> N[xx/yy, 30];
- rhs = rhs //. Power[E, power_] -> exp[power];
- rhs = rhs //. Power[xx_, 0.5] -> sqrt[xx];
+ rhs = rhs //. IfThen[cond1_,xx1_,yy1_] + IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ xx1 + xx2], Simplify[ yy1 + yy2]];
+ rhs = rhs //. ff1_ IfThen[cond1_,xx1_,yy1_] + IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ff1 xx1 + xx2], Simplify[ff1 yy1 + yy2]];
+ rhs = rhs //. IfThen[cond1_,xx1_,yy1_] + ff2_ IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ xx1 + ff2 xx2], Simplify[ yy1 + ff2 yy2]];
+ rhs = rhs //. ff1_ IfThen[cond1_,xx1_,yy1_] + ff2_ IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ff1 xx1 + ff2 xx2], Simplify[ff1 yy1 + ff2 yy2]];
+
+ (* Is this still a good idea when FMA instructions are used? *)
+ rhs = rhs //. xx_ yy_ + xx_ zz_ -> xx (yy+zz);
+ rhs = rhs //. xx_ yy_ - xx_ zz_ -> xx (yy-zz);
+
+ rhs = rhs /. Power[E, power_] -> exp[power];
(* there have been some problems doing the Max/Min
replacement via the preprocessor for C, so we do it
here *)
- rhs = rhs //. Max[xx_, yy_] -> fmax[xx, yy];
- rhs = rhs //. Min[xx_, yy_] -> fmin[xx, yy];
-
- rhs = rhs //. Power[xx_, power_] -> pow[xx, power]],
-
- rhs = rhs //. Power[xx_, power_] -> xx^power
+ rhs = rhs /. Max[xx_, yy_] -> fmax[xx, yy];
+ rhs = rhs /. Min[xx_, yy_] -> fmin[xx, yy];
+
+ rhs = rhs /. Power[xx_, power_] -> pow[xx, power];
+
+ (* FMA (fused multiply-add) instructions *)
+ (* Note that -x is represented as Times[-1, x] *)
+ isNotMinusOneQ[n_] := ! (IntegerQ[n] && n == -1);
+ isNotTimesMinusOneQ[n_] := ! MatchQ[n,- _];
+ fmaRules = {
+ + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fmadd [xx,yy,zz],
+ + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fmsub [xx,yy,zz],
+ - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fnmadd[xx,yy,zz],
+ - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fnmsub[xx,yy,zz],
+ + (xx_? isNotMinusOneQ) (yy_ + 1) -> fmadd [xx, yy, xx],
+ + (xx_? isNotMinusOneQ) (yy_ - 1) -> fmsub [xx, yy, xx],
+ - (xx_? isNotMinusOneQ) (yy_ + 1) -> fnmadd[xx, yy, xx],
+ - (xx_? isNotMinusOneQ) (yy_ - 1) -> fnmsub[xx, yy, xx],
+ fmadd[xx_, - yy_, zz_] -> fnmsub[xx,yy,zz],
+ fmsub[xx_, - yy_, zz_] -> fnmadd[xx,yy,zz]
+ };
+ rhs = rhs //. fmaRules;
+
+ (* Constants *)
+ rhs = rhs /. xx_Integer/; xx!=-1 :> ToReal[xx];
+ rhs = rhs /. xx_Real -> ToReal[xx];
+ rhs = rhs /. - ToReal[xx_] -> ToReal[- xx];
+ rhs = rhs /. ToReal[xx_] + ToReal[yy_] -> ToReal[xx + yy];
+ rhs = rhs /. ToReal[xx_] * ToReal[yy_] -> ToReal[xx * yy];
+ rhs = rhs /. pow[xx_, ToReal[power_]] -> pow[xx, power];
+ rhs = rhs /. IfThen[ToReal[xx_], yy_, zz_] -> IfThen[xx, yy, zz];
+
+ (* Replace all operators and functions *)
+ (* fadd, fsub, fmul, fdiv, fneg *)
+ isNotFneg[n_] := ! MatchQ[n,fneg[_]];
+ arithRules = {
+ - xx_ -> fneg[xx],
+ xx_ * yy_ -> fmul[xx,yy],
+ xx_ / yy_ -> fdiv[xx,yy],
+ xx_ + yy_ -> fadd[xx,yy],
+ xx_ - yy_ -> fsub[xx,yy],
+ fmul[-1,xx_] -> fneg[xx],
+ fadd[xx_,fneg[yy_]] -> fsub[xx,yy],
+ fadd[fneg[xx_],(yy_? isNotFneg)] :> fsub[yy,xx],
+ Abs[xx_] -> kfabs[xx],
+ Log[xx_] -> klog[xx],
+ fabs[xx_] -> kfabs[xx],
+ fmax[xx_,yy_] -> kfmax[xx,yy],
+ fmin[xx_,yy_] -> kfmin[xx,yy],
+ sqrt[xx_] -> ksqrt[xx],
+ exp[xx_] -> kexp[xx],
+ log[xx_] -> klog[xx],
+ pow[xx_,yy_] -> kpow[xx,yy]
+ };
+ rhs = rhs //. arithRules;
+ rhs = rhs /. IfThen[fmul[xx_, yy_], aa_, bb_] -> IfThen[xx*yy, aa, bb];
+ rhs = rhs /. ToReal[fneg[xx_]] -> ToReal[-xx];
+ rhs = rhs /. ToReal[fmul[xx_, yy_]] -> ToReal[xx*yy];
+ rhs = rhs /. kpow[xx_, fneg[power_]] -> kpow[xx, -power];
+ ],
+
+ rhs = rhs /. Power[xx_, power_] -> xx^power
];
(* Print[rhs//FullForm];*)
rhs
diff --git a/Tools/CodeGen/Differencing.m b/Tools/CodeGen/Differencing.m
index 609aa9d..1ac5ba8 100644
--- a/Tools/CodeGen/Differencing.m
+++ b/Tools/CodeGen/Differencing.m
@@ -168,8 +168,6 @@ CreateDifferencingHeader[derivOps_, zeroDims_] :=
pDefs = Union[Flatten[Map[First, mDefPairs]]];
expressions = Flatten[Map[#[[2]]&, mDefPairs]];
-(* expressions = Flatten[Map[ComponentDerivativeOperatorInlineDefinition, dupsRemoved]];*)
-
{pDefs,Map[{#, "\n"} &, expressions]}];
ordergfds[_[v1_,___], _[v2_,___]] :=
@@ -213,7 +211,10 @@ PrecomputeDerivative[d:pd_[gf_, inds___]] :=
evaluateDerivative[d:pd_[gf_, inds___]] :=
Module[{macroname},
macroName = ComponentDerivativeOperatorMacroName[pd[inds] -> expr];
- Return[ToString[macroName] <> "(" <> ToString[gf] <> ", i, j, k)"]];
+ (* Return[ToString[macroName] <> "(" <> ToString[gf] <> ", i, j, k)"] *)
+ (* Return[ToString[macroName] <> "(" <> ToString[gf] <> ")"] *)
+ Return[ToString[macroName] <> "(&" <> ToString[gf] <> "[index])"]
+ ];
DeclareDerivative[d:pd_[gf_, inds___]] :=
DeclareVariable[GridFunctionDerivativeName[d], "// CCTK_REAL_VEC"];
@@ -248,7 +249,7 @@ sbpMacroDefinition[macroName_, d_] :=
<> "(i,j,k,sbp_" <> l <> "min,sbp_" <> l <> "max,d" <> ds <> ",u,q" <> ds <> ",cctkGH))"}] ];
ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> expr_)] :=
- Module[{macroName, rhs, rhs2, i = "i", j = "j", k = "k", spacings, spacings2, pat, ss, num, den, newnum, signModifier, quotient, liName, rhs3, rhs4},
+ Module[{macroName, rhs, i = "i", j = "j", k = "k", spacings, spacings2, pat, ss, num, den, newnum, signModifier, quotient, liName},
macroName = ComponentDerivativeOperatorMacroName[componentDerivOp];
@@ -262,22 +263,23 @@ ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> e
Return[sbpMacroDefinition[macroName, 3]]];
rhs = DifferenceGF[expr, i, j, k];
+(* Print["rhs1 == ", FullForm[rhs]];*)
spacings = {spacing[1] -> 1/"dxi", spacing[2] -> 1/"dyi", spacing[3] -> 1/"dzi"};
spacings2 = {spacing[1] -> "dx", spacing[2] -> "dy", spacing[3] -> "dz"};
- rhs2 = FullSimplify[rhs];
+ rhs = FullSimplify[rhs];
-(* Print["rhs2 == ", FullForm[rhs2]];*)
+(* Print["rhs2 == ", FullForm[rhs]];*)
pat = Times[spInExpr:(Power[spacing[_],_]..), (Rational[x_,y_])..., rest__];
(* Print["pat == ", pat//FullForm];*)
- If[MatchQ[rhs2, pat],
+ If[MatchQ[rhs, pat],
(* Print["matches!"];*)
- ss = Times[rhs2 /. pat -> spInExpr];
+ ss = Times[rhs /. pat -> spInExpr];
(* Print["ss == ", ss];*)
- num = rhs2 /. pat -> x;
- den = rhs2 /. pat -> y;
+ num = rhs /. pat -> x;
+ den = rhs /. pat -> y;
(* Print["num == ", num];
Print["den == ", den];*)
If[{num, 1, 2} === {1, 2},(* Print["SEQ!"]; *) newnum = 1; den=1; signModifier = "",
@@ -303,39 +305,35 @@ ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> e
liName = "p" <> signModifier <> quotient <> ToString[Apply[SequenceForm,Simplify[1/(ss /. spacings2)],{0,Infinity}]];
(* Print["liName == ", liName];*)
- rhs3 = rhs2 /. pat -> Times[liName, rest],
+ (* rhs = rhs /. pat -> Times[liName, rest], *)
+ rhs = (rhs /. pat -> Times[liName, rest]) / liName,
(* Print["!!!!!!!!DOES NOT MATCH!!!!!!!!!"];*)
- rhs3 = rhs2];
+ rhs = rhs];
-(* Print["rhs3 == ", rhs3];*)
+(* Print["rhs3 == ", FullForm[rhs]];*)
pDefs = {{liName -> CFormHideStrings[ReplacePowers[num / den ss /. spacings2]]}};
-(* rhs4 = Factor[rhs3];*)
-
- rhs4 = rhs3 //. (x_ a_ + x_ b_) -> x(a+b);
- rhs5 = rhs4 //. (x_ a_ - x_ b_) -> x(a-b);
+(* rhs = Factor[rhs];*)
+ rhs = rhs //. (x_ a_ + x_ b_) -> x (a+b);
+ rhs = rhs //. (x_ a_ - x_ b_) -> x (a-b);
(* Print[componentDerivOp, ": "];
- Print[FullForm[rhs5]];
+ Print[FullForm[rhs]];
Print[""];*)
- rhs6 = CFormHideStrings[ReplacePowers[rhs5 /. spacings]];
- {pDefs, FlattenBlock[{"#define ", macroName, "(u,i,j,k) ", "(", rhs6, ")"}]}];
-
-ComponentDerivativeOperatorInlineDefinition[componentDerivOp:(name_[inds___] -> expr_)] :=
- Module[{inlineName, rhs, rhs2, i = "i", j = "j", k = "k", spacings},
-
- inlineName = ComponentDerivativeOperatorMacroName[componentDerivOp];
-
- rhs = DifferenceGF[expr, i, j, k];
-(* rhs = DifferenceGFInline[expr, i, j, k];*)
- spacings = {spacing[1] -> 1/"dxi", spacing[2] -> 1/"dyi", spacing[3] -> 1/"dzi"};
- rhs2 = CFormHideStrings[FullSimplify[ReplacePowers[rhs /. spacings]]];
-
- DefineFunction[inlineName, "static inline CCTK_REAL",
- "CCTK_REAL *u, int i, int j, int k",
- {"return ", rhs2, ";\n"}]];
+ rhs = CFormHideStrings[ReplacePowers[rhs /. spacings]];
+ (* {pDefs, FlattenBlock[{"#define ", macroName, "(u,i,j,k) ", "(", rhs, ")"}]} *)
+ {pDefs, FlattenBlock[{
+ "#ifndef KRANC_DIFF_FUNCTIONS\n",
+ (* default, differencing operators are macros *)
+ "# define ", macroName, "(u) ", "(fmul(", liName, ",", rhs, "))\n",
+ "#else\n",
+ (* new, differencing operators are static functions *)
+ "# define ", macroName, "(u) ", "(", liName, "*", macroName, "_impl((u),dj,dk))\n",
+ "static CCTK_REAL_VEC ", macroName, "_impl(CCTK_REAL const* restrict const u, int const dj, int const dk) ", "{ return ", rhs, "; }\n",
+ "#endif\n"
+ }]}];
ComponentDerivativeOperatorMacroName[componentDerivOp:(name_[inds___] -> expr_)] :=
Module[{stringName},
@@ -368,14 +366,6 @@ DifferenceGF[op_, i_, j_, k_] :=
Apply[Plus, Map[DifferenceGFTerm[#, i, j, k] &, expanded]],
DifferenceGFTerm[expanded, i, j, k]]];
-DifferenceGFInline[op_, i_, j_, k_] :=
- Module[{expanded},
- expanded = Expand[op];
-
- If[Head[expanded] === Plus,
- Apply[Plus, Map[DifferenceGFTermInline[#, i, j, k] &, expanded]],
- DifferenceGFTerm[expanded, i, j, k]]];
-
(* Return the fragment of a macro definition for defining a derivative
operator *)
@@ -404,10 +394,31 @@ DifferenceGFTerm[op_, i_, j_, k_] :=
"(int)(" <> ToString[CFormHideStrings[j+ny]] <> ")," <>
"(int)(" <> ToString[CFormHideStrings[k+nz]] <> "))]",
*)
- remaining "vec_loadu((u)[index" <>
- "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <>
+(*
+ remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <>
+ "(u)[index" <>
+ "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <>
+ "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <>
+ "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])",
+*)
+(*
+ remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <>
+ "(u)[(" <> ToString[CFormHideStrings[nx]] <> ")" <>
"+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <>
"+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])",
+*)
+ remaining "vec_loadu_maybe3" <>
+ "(" <> ToString[CFormHideStrings[nx /. {dir1->1, dir2->1, dir3->1}]] <> "," <>
+ ToString[CFormHideStrings[ny /. {dir1->1, dir2->1, dir3->1}]] <> "," <>
+ ToString[CFormHideStrings[nz /. {dir1->1, dir2->1, dir3->1}]] <> "," <>
+ "(u)[(" <> ToString[CFormHideStrings[nx]] <> ")" <>
+ "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <>
+ "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])",
+(*
+ remaining "vec_loadu(u[(" <> ToString[CFormHideStrings[nx]] <> ")" <>
+ "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <>
+ "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])",
+*)
(*
remaining "(u)[CCTK_GFINDEX3D(cctkGH,floor((" <>
ToString[CFormHideStrings[i+nx]] <> ")+0.5),floor((" <>
@@ -417,27 +428,6 @@ DifferenceGFTerm[op_, i_, j_, k_] :=
remaining "u(" <> ToString[FortranForm[i+nx]] <> "," <>
ToString[FortranForm[j+ny]] <> "," <> ToString[FortranForm[k+nz]] <> ")"] ];
-(* Return the fragment of a function definition for defining a derivative
- operator *)
-DifferenceGFTermInline[op_, i_, j_, k_] :=
- Module[{nx, ny, nz, remaining},
-
- If[op === 0,
- Return[0]];
-
- nx = Exponent[op, shift[1]];
- ny = Exponent[op, shift[2]];
- nz = Exponent[op, shift[3]];
-
- remaining = op / (shift[1]^nx) / (shift[2]^ny) / (shift[3]^nz);
-
- If[Cases[{remaining}, shift[_], Infinity] != {},
- ThrowError["Could not parse difference operator:", op]];
-
- remaining "(u)[CCTK_GFINDEX3D(cctkGH," <> ToString[CFormHideStrings[i+nx]] <> "," <>
- ToString[CFormHideStrings[j+ny]] <> "," <> ToString[CFormHideStrings[k+nz]] <> ")]"
- ];
-
DerivativeOperatorGFDs[gf_];
diff --git a/Tools/CodeGen/Kranc.m b/Tools/CodeGen/Kranc.m
index fd07c53..21acd21 100644
--- a/Tools/CodeGen/Kranc.m
+++ b/Tools/CodeGen/Kranc.m
@@ -22,7 +22,11 @@ BeginPackage["Kranc`"];
(* CodeGen.m *)
-{INV, SQR, CUB, QAD, exp, pow, fmax, fmin, dx, dy, dz, khalf, kthird, ktwothird, kfourthird, keightthird};
+{INV, SQR, CUB, QAD, IfThen, ToReal, sqrt, exp, pow, fmax, fmin,
+ fmadd, fmsub, fnmadd, fnmsub, fneg, fadd, fsub, fmul, fdiv,
+ kfabs, kfmax, kfmin, ksqrt, kexp, klog, kpow,
+ dir1, dir2, dir3, dx, dy, dz,
+ khalf, kthird, ktwothird, kfourthird, keightthird};
(* Helpers.m *)
diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m
index 353be9c..4ada28a 100644
--- a/Tools/CodeGen/Thorn.m
+++ b/Tools/CodeGen/Thorn.m
@@ -476,11 +476,12 @@ CreateSchedule[globalStorageGroups_, scheduledGroups_, scheduledFunctions_] :=
calculationMacros[] :=
CommentedBlock["Define macros used in calculations",
Map[{"#define ", #, "\n"} &,
- {"INITVALUE (42)",
- "INV(x) ((1.0) / (x))" ,
- "SQR(x) ((x) * (x))" ,
- "CUB(x) ((x) * (x) * (x))" ,
- "QAD(x) ((x) * (x) * (x) * (x))"}]];
+ {"INITVALUE (42)",
+ "INV(x) (fdiv(ToReal(1.0),x))",
+ "SQR(x) (fmul(x,x))",
+ "CUB(x) (x*SQR(x))",
+ "QAD(x) (SQR(SQR(x)))"
+ }]];
(* Given a list of Calculation structures as defined above, create a
CodeGen representation of a source file that defines a function for
@@ -508,7 +509,7 @@ CreateSetterSource[calcs_, debug_, useCSE_, include_, imp_,
],
Map[IncludeFile, Join[{"cctk.h", "cctk_Arguments.h", "cctk_Parameters.h",
- (*"precomputations.h",*) "GenericFD.h", "Differencing.h", "Vectors.hh"}, include,
+ (*"precomputations.h",*) "GenericFD.h", "Vectors.hh", "Differencing.h"}, include,
If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}]]],
calculationMacros[],
@@ -738,10 +739,10 @@ CreateMoLBoundariesSource[spec_] :=
"if (CCTK_EQUALS(" <> boundpar <> ", \"none\" ) ||\n",
" CCTK_EQUALS(" <> boundpar <> ", \"static\") ||\n",
" CCTK_EQUALS(" <> boundpar <> ", \"flat\" ) ||\n",
- " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) ) \n",
+ " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) )\n",
"{\n",
- " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, -1, \n",
+ " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, -1,\n",
" \"" <> fullgroupname <> "\", " <> boundpar <> ");\n",
" if (ierr < 0)\n",
@@ -760,10 +761,10 @@ CreateMoLBoundariesSource[spec_] :=
"if (CCTK_EQUALS(" <> boundpar <> ", \"none\" ) ||\n",
" CCTK_EQUALS(" <> boundpar <> ", \"static\") ||\n",
" CCTK_EQUALS(" <> boundpar <> ", \"flat\" ) ||\n",
- " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) ) \n",
+ " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) )\n",
"{\n",
- " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, -1, \n",
+ " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, -1,\n",
" \"" <> fullgfname <> "\", " <> boundpar <> ");\n",
" if (ierr < 0)\n",
@@ -796,7 +797,7 @@ CreateMoLBoundariesSource[spec_] :=
" CCTK_WARN(0, \"could not set SPEED value in table!\");\n",
"\n",
- " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n",
+ " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n",
" \"" <> fullgroupname <> "\", \"Radiation\");\n\n",
" if (ierr < 0)\n",
@@ -830,7 +831,7 @@ CreateMoLBoundariesSource[spec_] :=
" CCTK_WARN(0, \"could not set SPEED value in table!\");\n",
"\n",
- " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n",
+ " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n",
" \"" <> fullgfname <> "\", \"Radiation\");\n\n",
" if (ierr < 0)\n",
@@ -859,7 +860,7 @@ CreateMoLBoundariesSource[spec_] :=
" CCTK_WARN(0, \"could not set SCALAR value in table!\");\n",
"\n",
- " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n",
+ " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n",
" \"" <> fullgroupname <> "\", \"scalar\");\n\n",
" if (ierr < 0)\n",
@@ -889,7 +890,7 @@ CreateMoLBoundariesSource[spec_] :=
" CCTK_WARN(0, \"could not set SCALAR value in table!\");\n",
"\n",
- " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n",
+ " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n",
" \"" <> fullgfname <> "\", \"scalar\");\n\n",
" if (ierr < 0)\n",