aboutsummaryrefslogtreecommitdiff
path: root/src/vectors-8-SSE2.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vectors-8-SSE2.h')
-rw-r--r--src/vectors-8-SSE2.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h
index bce093c..46a855e 100644
--- a/src/vectors-8-SSE2.h
+++ b/src/vectors-8-SSE2.h
@@ -8,7 +8,14 @@
#include <assert.h>
#include <math.h>
-#include <x86intrin.h>
+#include <emmintrin.h>
+#ifdef __SSE4_1__
+// Intel's SSE 4.1
+# include <smmintrin.h>
+#endif
+#ifdef __SSE4A__
+// AMD's SSE 4a
+# include <ammintrin.h>
// Intel compilers don't support SSE 4a. Here is how we can implement
// these instructions in assembler instead:
@@ -19,6 +26,11 @@
// asm ("movntsd %[x],%[p]" : "=m" (*p) : [p] "m" (*p), [x] "x" (x));
// }
+#endif
+#ifdef __FMA4__
+# include <fma4intrin.h>
+#endif
+
#ifdef __SSE4_1__