aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/make.code.defn2
-rw-r--r--src/test.c83
-rw-r--r--src/test.cc184
-rw-r--r--src/vectors-4-SSE.h14
-rw-r--r--src/vectors-4-default.h12
-rw-r--r--src/vectors-8-DoubleHummer.h6
-rw-r--r--src/vectors-8-SSE2.h14
-rw-r--r--src/vectors-8-default.h12
-rw-r--r--src/vectors.cc (renamed from src/vectors.c)1
9 files changed, 221 insertions, 107 deletions
diff --git a/src/make.code.defn b/src/make.code.defn
index dea6494..5c61b31 100644
--- a/src/make.code.defn
+++ b/src/make.code.defn
@@ -1,7 +1,7 @@
# Main make.code.defn file for thorn Vectors
# Source files in this directory
-SRCS = vectors.c test.c
+SRCS = vectors.cc test.cc
# Subdirectories containing source files
SUBDIRS =
diff --git a/src/test.c b/src/test.c
deleted file mode 100644
index 680e775..0000000
--- a/src/test.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "cctk.h"
-#include "cctk_Arguments.h"
-#include "cctk_Parameters.h"
-#include "vectors.h"
-#include <math.h>
-
-#define VECTEST(testname, vecexpr, scalarexpr) \
-{ \
- CCTK_REAL_VEC rv = vecexpr; \
- for(int i=0; i<CCTK_REAL_VEC_SIZE; i++) { \
- CCTK_REAL res = scalarexpr; \
- CCTK_REAL vecres = vec_elt(rv,i); \
- if(vecres == res) \
- passed++; \
- else \
- CCTK_VWarn(CCTK_WARN_ALERT, __LINE__, __FILE__, \
- CCTK_THORNSTRING, "Failed test %s", testname); \
- numtests++; \
- } \
-}
-
-void Vectors_Test(CCTK_ARGUMENTS)
-{
- DECLARE_CCTK_ARGUMENTS;
- DECLARE_CCTK_PARAMETERS;
-
- CCTK_INT passed = 0, numtests=0;
-
- CCTK_REAL a[CCTK_REAL_VEC_SIZE];
- CCTK_REAL b[CCTK_REAL_VEC_SIZE];
- CCTK_REAL c[CCTK_REAL_VEC_SIZE];
-
- for(int i=0; i<CCTK_REAL_VEC_SIZE; i++) {
- a[i] = (i+1)*1.23456789;
- b[i] = -(i+1)*9.87654321;
- c[i] = (i+1)*1.01010101;
- }
-
- CCTK_REAL_VEC av = vec_loadu(a[0]);
- CCTK_REAL_VEC bv = vec_loadu(b[0]);
- CCTK_REAL_VEC cv = vec_loadu(c[0]);
- CCTK_REAL_VEC rv = vec_loadu(c[0]);
-
- /* TODO: Add individual tests for vec_set1, vec_set, vec_elt0, vec_elt
- vec_load, vec_loadu, vec_loadu_maybe, vec_loadu_maybe3
- vec_store, vec_store_nta, vec_store_nta_partial_lo,
- vec_store_nta_partial_hi, vec_store_nta_partial_mid */
-
- VECTEST("kpos", kpos(av), +a[i] );
- VECTEST("kneg", kneg(av), -a[i] );
-
- VECTEST("kadd", kadd(av, bv), a[i] + b[i] );
- VECTEST("ksub", ksub(av, bv), a[i] - b[i] );
- VECTEST("kmul", kmul(av, bv), a[i] * b[i] );
- VECTEST("kdiv", kdiv(av, bv), a[i] / b[i] );
-
- VECTEST("kmadd", kmadd(av, bv, cv), a[i] * b[i] + c[i] );
- VECTEST("kmsub", kmsub(av, bv, cv), a[i] * b[i] - c[i] );
- VECTEST("knmadd", knmadd(av, bv, cv), -a[i] * b[i] - c[i] );
- VECTEST("knmsub", knmsub(av, bv, cv), -a[i] * b[i] + c[i] );
-
- VECTEST("kexp", kexp(av), exp(a[i]) );
- VECTEST("kfabs", kfabs(av), fabs(a[i]) );
- VECTEST("kfmax", kfmax(av, bv), fmax(a[i], b[i]) );
- VECTEST("kfmin", kfmin(av, bv), fmin(a[i], b[i]) );
- VECTEST("kfnabs", kfnabs(av), -fabs(a[i]) );
- VECTEST("klog", klog(av), log(a[i]) );
- VECTEST("kpow", kpow(av, 3.14159), pow(a[i], 3.14159) );
- VECTEST("ksqrt", ksqrt(av), sqrt(a[i]) );
-
- VECTEST("kifpos positive", kifpos(av, bv, cv), signbit(a[i]) ? c[i] : b[i]);
- VECTEST("kifpos negative", kifpos(bv, bv, cv), signbit(b[i]) ? c[i] : b[i]);
- VECTEST("kifpos 0", kifpos(ToReal(0.),bv,cv), signbit(0.)?c[i]:b[i]);
- VECTEST("kifpos -0", kifpos(ToReal(-0.),bv,cv), signbit(-0.)?c[i]:b[i]);
-
- if (passed != numtests)
- CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Failed %d correctness tests", numtests - passed);
- else
- CCTK_VInfo(CCTK_THORNSTRING, "%d/%d tests passed ", passed, numtests);
-
- return;
-}
diff --git a/src/test.cc b/src/test.cc
new file mode 100644
index 0000000..7e76362
--- /dev/null
+++ b/src/test.cc
@@ -0,0 +1,184 @@
+#include "cctk.h"
+#include "cctk_Arguments.h"
+#include "cctk_Parameters.h"
+#include "vectors.h"
+#include <math.h>
+#include <stdio.h>
+
+#define SCALARTEST(testname, vecexpr, scalarexpr) \
+do { \
+ if (verbose) \
+ CCTK_VInfo (CCTK_THORNSTRING, "Test %s...", testname); \
+ CCTK_REAL res = (scalarexpr); \
+ CCTK_REAL vecres = (vecexpr); \
+ if(vecres == res) \
+ passed++; \
+ else \
+ CCTK_VParamWarn(CCTK_THORNSTRING, "Failed test %s", (testname)); \
+ numtests++; \
+} while(0)
+
+#define VECTEST(testname, vecexpr, scalarexpr) \
+do { \
+ if (verbose) \
+ CCTK_VInfo (CCTK_THORNSTRING, "Test %s...", testname); \
+ CCTK_REAL_VEC rv = (vecexpr); \
+ for(int i=0; i<CCTK_REAL_VEC_SIZE; i++) { \
+ CCTK_REAL res = (scalarexpr); \
+ CCTK_REAL vecres = vec_elt(rv,i); \
+ if(vecres == res) \
+ passed++; \
+ else \
+ CCTK_VParamWarn(CCTK_THORNSTRING, "Failed test %s", (testname)); \
+ numtests++; \
+ } \
+} while(0)
+
+extern "C"
+void Vectors_Test(CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ CCTK_INFO ("Testing vectorisation... [errors may result in segfaults]");
+ fflush (stdout);
+
+ char testname[100];
+
+ int passed = 0, numtests = 0;
+
+ CCTK_REAL a[CCTK_REAL_VEC_SIZE];
+ CCTK_REAL b[CCTK_REAL_VEC_SIZE];
+ CCTK_REAL c[CCTK_REAL_VEC_SIZE];
+
+ for(int i=0; i<CCTK_REAL_VEC_SIZE; i++) {
+ a[i] = (i+1)*1.23456789;
+ b[i] = -(i+1)*9.87654321;
+ c[i] = (i+1)*1.01010101;
+ }
+
+ CCTK_REAL_VEC av = vec_loadu(a[0]);
+ CCTK_REAL_VEC bv = vec_loadu(b[0]);
+ CCTK_REAL_VEC cv = vec_loadu(c[0]);
+
+ /* l and lv are similar to a and av, except that it is larger, and
+ guaranteed to be aligned */
+ CCTK_REAL_VEC lv[4];
+ lv[0] = vec_loadu(a[0]);
+ lv[1] = vec_loadu(b[0]);
+ lv[2] = vec_loadu(c[0]);
+ lv[3] = vec_loadu(a[0]);
+ CCTK_REAL *const l = (CCTK_REAL*)&lv[0];
+
+ /* s and sv are similar to a and av, but are aligned and not
+ initialised */
+ CCTK_REAL_VEC sv;
+ CCTK_REAL *const s = (CCTK_REAL*)&sv;
+
+ /* TODO: Add individual tests for vec_set1, vec_set, vec_elt0, vec_elt
+ vec_load, vec_loadu, vec_loadu_maybe, vec_loadu_maybe3
+ vec_store, vec_store_nta, vec_store_nta_partial_lo,
+ vec_store_nta_partial_hi, vec_store_nta_partial_mid */
+
+ VECTEST("vec_set1", vec_set1(a[0]), a[0]);
+#if CCTK_REAL_VEC_SIZE == 1
+ VECTEST("vec_set", vec_set(a[0]), a[i]);
+#elif CCTK_REAL_VEC_SIZE == 2
+ VECTEST("vec_set", vec_set(a[0],a[1]), a[i]);
+#elif CCTK_REAL_VEC_SIZE == 4
+ VECTEST("vec_set", vec_set(a[0],a[1],a[2],a[3]), a[i]);
+#else
+# error "Unsupported vector size"
+#endif
+ SCALARTEST("vec_elt0", vec_elt0(av), a[0]);
+ for (int d=0; d<CCTK_REAL_VEC_SIZE; ++d) {
+ snprintf (testname, sizeof testname, "vec_elt[%d]", d);
+ SCALARTEST(testname, vec_elt(av,d), a[d]);
+ }
+
+ /* These tests will probably fail with a segfault, if they fail */
+ VECTEST("vec_load", vec_load(*l), l[i]);
+ for (int d=0; d<CCTK_REAL_VEC_SIZE; ++d) {
+ snprintf (testname, sizeof testname, "vec_loadu[%d]", d);
+ VECTEST(testname, vec_loadu(l[d]), l[i+d]);
+ }
+ for (int d=0; d<CCTK_REAL_VEC_SIZE; ++d) {
+ snprintf (testname, sizeof testname, "vec_loadu_maybe[%d]", d);
+ VECTEST(testname, vec_loadu_maybe(d,l[d]), l[i+d]);
+ }
+ for (int d1=0; d1<CCTK_REAL_VEC_SIZE; ++d1) {
+ for (int d2=0; d2<CCTK_REAL_VEC_SIZE; ++d2) {
+ for (int d3=0; d3<CCTK_REAL_VEC_SIZE; ++d3) {
+ if (! VECTORISE_ALIGNED_ARRAYS || (d2==0 && d3==0)) {
+ snprintf (testname, sizeof testname,
+ "vec_loadu_maybe3[%d,%d,%d]", d1,d2,d3);
+ VECTEST(testname,
+ vec_loadu_maybe3(d1,d2,d3,l[d1+d2+d3]), l[i+d1+d2+d3]);
+ }
+ }
+ }
+ }
+
+ /* These tests may fail with a segfault, if they fail */
+ sv = av; vec_store(*s, bv);
+ VECTEST("vec_store", sv, b[i]);
+ sv = av; vec_store_nta(*s, bv);
+ VECTEST("vec_store_nta", sv, b[i]);
+ /* The partial stores are not implemented for d==0 and
+ d==CCTK_REAL_VEC_SIZE-1 (because these are trivial) */
+ for (int d=1; d<CCTK_REAL_VEC_SIZE-1; ++d) {
+ sv = av; vec_store_nta_partial_lo(*s, bv, d);
+ snprintf (testname, sizeof testname, "vec_store_nta_partial_lo[%d]", d);
+ VECTEST(testname, sv, i<d ? b[i] : a[i]);
+ }
+ for (int d=1; d<CCTK_REAL_VEC_SIZE-1; ++d) {
+ sv = av; vec_store_nta_partial_hi(*s, bv, d);
+ snprintf (testname, sizeof testname, "vec_store_nta_partial_hi[%d]", d);
+ VECTEST(testname, sv, i>=CCTK_REAL_VEC_SIZE-d ? b[i] : a[i]);
+ }
+ for (int dlo=1; dlo<CCTK_REAL_VEC_SIZE-1; ++dlo) {
+ for (int dhi=1; dhi<CCTK_REAL_VEC_SIZE-1; ++dhi) {
+ sv = av; vec_store_nta_partial_mid(*s, bv, dlo, dhi);
+ snprintf (testname, sizeof testname,
+ "vec_store_nta_partial_mid[%d,%d]", dlo, dhi);
+ VECTEST(testname, sv, i<dlo && i>=CCTK_REAL_VEC_SIZE-dhi ? b[i] : a[i]);
+ }
+ }
+
+ VECTEST("kpos", kpos(av), +a[i] );
+ VECTEST("kneg", kneg(av), -a[i] );
+
+ VECTEST("kadd", kadd(av, bv), a[i] + b[i] );
+ VECTEST("ksub", ksub(av, bv), a[i] - b[i] );
+ VECTEST("kmul", kmul(av, bv), a[i] * b[i] );
+ VECTEST("kdiv", kdiv(av, bv), a[i] / b[i] );
+
+ VECTEST("kmadd", kmadd(av, bv, cv), a[i] * b[i] + c[i] );
+ VECTEST("kmsub", kmsub(av, bv, cv), a[i] * b[i] - c[i] );
+ VECTEST("knmadd", knmadd(av, bv, cv), -a[i] * b[i] - c[i] );
+ VECTEST("knmsub", knmsub(av, bv, cv), -a[i] * b[i] + c[i] );
+
+ VECTEST("kexp", kexp(av), exp(a[i]) );
+ VECTEST("kfabs", kfabs(av), fabs(a[i]) );
+ VECTEST("kfmax", kfmax(av, bv), fmax(a[i], b[i]) );
+ VECTEST("kfmin", kfmin(av, bv), fmin(a[i], b[i]) );
+ VECTEST("kfnabs", kfnabs(av), -fabs(a[i]) );
+ VECTEST("klog", klog(av), log(a[i]) );
+ VECTEST("kpow", kpow(av, 3.14159), pow(a[i], 3.14159) );
+ VECTEST("ksqrt", ksqrt(av), sqrt(a[i]) );
+
+ VECTEST("kifpos positive",
+ kifpos(av, bv, cv), std::signbit(a[i]) ? c[i] : b[i]);
+ VECTEST("kifpos negative",
+ kifpos(bv, bv, cv), std::signbit(b[i]) ? c[i] : b[i]);
+ VECTEST("kifpos 0", kifpos(vec_set1(0.),bv,cv), b[i]);
+ VECTEST("kifpos -0", kifpos(vec_set1(-0.),bv,cv), c[i]);
+
+ if (passed != numtests)
+ CCTK_VWarn(CCTK_WARN_ALERT, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Failed %d correctness tests", numtests - passed);
+ else
+ CCTK_VInfo(CCTK_THORNSTRING, "%d/%d tests passed ", passed, numtests);
+
+ return;
+}
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index d04d0b7..49925c1 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -5,6 +5,9 @@
+#include <assert.h>
+#include <math.h>
+
#include <xmmintrin.h>
#ifdef __SSE4_1__
// Intel's SSE 4.1
@@ -309,12 +312,11 @@ static const union {
#ifdef __SSE4_1__
# define k4ifpos(x,y,z) (_mm_blendv_ps(y,z,x))
#else
-#include <math.h>
-#ifdef __cplusplus
-#define SGN(x) std::signbit(x)
-#else
-#define SGN(x) signbit(x)
-#endif
+# ifdef __cplusplus
+# define SGN(x) std::signbit(x)
+# else
+# define SGN(x) signbit(x)
+# endif
# define k4ifpos(x,y,z) \
({ \
CCTK_REAL4_VEC const xx=(x_); \
diff --git a/src/vectors-4-default.h b/src/vectors-4-default.h
index ac1f1a4..76e96b9 100644
--- a/src/vectors-4-default.h
+++ b/src/vectors-4-default.h
@@ -1,13 +1,16 @@
// Fallback vectorisation implementation: Do not vectorise
-
-
// We use macros here, so that we are not surprised by compilers which
// don't like to inline functions. This should also make debug builds
// (which may not inline) more efficient.
+#include <assert.h>
+#include <math.h>
+
+
+
#define vec4_architecture "scalar (no vectorisation, 32-bit precision)"
// Use CCTK_REAL4
@@ -81,11 +84,10 @@
#define k4pow(x,a) (powf(x,a))
#define k4sqrt(x) (sqrtf(x))
-#include <math.h>
#ifdef __cplusplus
-#define SGN(x) std::signbit(x)
+# define SGN(x) std::signbit(x)
#else
-#define SGN(x) signbit(x)
+# define SGN(x) signbit(x)
#endif
#define k4ifpos(x,y,z) (SGN(x)?(z):(y))
diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h
index d5423bb..952f7fb 100644
--- a/src/vectors-8-DoubleHummer.h
+++ b/src/vectors-8-DoubleHummer.h
@@ -5,7 +5,11 @@
-#include <builtins.h>
+#include <assert.h>
+
+#ifdef __cplusplus
+# include <builtins.h>
+#endif
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h
index ed84f8b..426a8a5 100644
--- a/src/vectors-8-SSE2.h
+++ b/src/vectors-8-SSE2.h
@@ -5,6 +5,9 @@
+#include <assert.h>
+#include <math.h>
+
#include <emmintrin.h>
#ifdef __SSE4_1__
// Intel's SSE 4.1
@@ -231,12 +234,11 @@ static const union {
r; \
})
#else
-#include <math.h>
-#ifdef __cplusplus
-#define SGN(x) std::signbit(x)
-#else
-#define SGN(x) signbit(x)
-#endif
+# ifdef __cplusplus
+# define SGN(x) std::signbit(x)
+# else
+# define SGN(x) signbit(x)
+# endif
# define k8ifpos(x_,y_,z_) \
({ \
CCTK_REAL8_VEC const xx=(x_); \
diff --git a/src/vectors-8-default.h b/src/vectors-8-default.h
index 279cdb2..aaed7d9 100644
--- a/src/vectors-8-default.h
+++ b/src/vectors-8-default.h
@@ -1,13 +1,16 @@
// Fallback vectorisation implementation: Do not vectorise
-
-
// We use macros here, so that we are not surprised by compilers which
// don't like to inline functions. This should also make debug builds
// (which may not inline) more efficient.
+#include <assert.h>
+#include <math.h>
+
+
+
#define vec8_architecture "scalar (no vectorisation, 64-bit precision)"
// Use CCTK_REAL8
@@ -81,11 +84,10 @@
#define k8pow(x,a) (pow(x,a))
#define k8sqrt(x) (sqrt(x))
-#include <math.h>
#ifdef __cplusplus
-#define SGN(x) std::signbit(x)
+# define SGN(x) std::signbit(x)
#else
-#define SGN(x) signbit(x)
+# define SGN(x) signbit(x)
#endif
#define k8ifpos(x,y,z) (SGN(x)?(z):(y))
diff --git a/src/vectors.c b/src/vectors.cc
index 7588295..ffff4be 100644
--- a/src/vectors.c
+++ b/src/vectors.cc
@@ -4,6 +4,7 @@
#include "cctk_Parameters.h"
#include "vectors.h"
+extern "C"
int Vectors_Startup(void)
{
CCTK_VInfo(CCTK_THORNSTRING, "Using vector size %d for architecture %s",