aboutsummaryrefslogtreecommitdiff
path: root/src/vectors-4-SSE.h
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-12-15 15:30:14 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2011-12-15 15:30:14 +0000
commit9305624ffce90bca91be92db1a718e428a1cdf4c (patch)
treea7b2ae42dc1ee25981d74c257e16d8aa4002d1fd /src/vectors-4-SSE.h
parent52064a4e928175bfa1be3f7e8d262f584ca025e9 (diff)
Don't use <x86intrin.h>; this does not exist everywhere
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@40 105869f7-3296-0410-a4ea-f4349344b45a
Diffstat (limited to 'src/vectors-4-SSE.h')
-rw-r--r--src/vectors-4-SSE.h28
1 files changed, 23 insertions, 5 deletions
diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h
index 927d54e..8fea7ba 100644
--- a/src/vectors-4-SSE.h
+++ b/src/vectors-4-SSE.h
@@ -8,7 +8,18 @@
#include <assert.h>
#include <math.h>
-#include <x86intrin.h>
+#include <xmmintrin.h>
+#ifdef __SSE4_1__
+// Intel's SSE 4.1
+# include <smmintrin.h>
+#endif
+#ifdef __SSE4A__
+// AMD's SSE 4a
+# include <ammintrin.h>
+#endif
+#ifdef __FMA4__
+# include <fma4intrin.h>
+#endif
@@ -265,10 +276,17 @@ static const union {
#define k4div(x,y) (_mm_div_ps(x,y))
// Fused multiply-add, defined as [+-]x*y[+-]z
-#define k4madd(x,y,z) (k4add(k4mul(x,y),z))
-#define k4msub(x,y,z) (k4sub(k4mul(x,y),z))
-#define k4nmadd(x,y,z) (k4sub(k4neg(z),k4mul(x,y)))
-#define k4nmsub(x,y,z) (k4sub(z,k4mul(x,y)))
+#ifdef __FMA4__
+# define k4madd(x,y,z) (_mm_macc_ps(x,y,z))
+# define k4msub(x,y,z) (_mm_msub_ps(x,y,z))
+# define k4nmadd(x,y,z) (_mm_nmsub_ps(x,y,z))
+# define k4nmsub(x,y,z) (_mm_nmacc_ps(x,y,z))
+#else
+# define k4madd(x,y,z) (k4add(k4mul(x,y),z))
+# define k4msub(x,y,z) (k4sub(k4mul(x,y),z))
+# define k4nmadd(x,y,z) (k4sub(k4neg(z),k4mul(x,y)))
+# define k4nmsub(x,y,z) (k4sub(z,k4mul(x,y)))
+#endif
// Cheap functions
#define k4fabs(x) (_mm_andnot_ps(k4sign_mask,x))