diff options
author | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2011-08-25 17:40:05 +0000 |
---|---|---|
committer | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2011-08-25 17:40:05 +0000 |
commit | 802b82837b5b37b7c76ee807939bbffe76f17fdd (patch) | |
tree | cc18cef877eb8067bdbd202a696112cf2d6e0431 /src/vectors-8-SSE2.h | |
parent | b075a3dfcf5aaa72c086d7896ab0c975f42d04c2 (diff) |
Suggest asm statements to support SSE4a with Intel compilers.
Indent vector architecture definitions.
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@30 105869f7-3296-0410-a4ea-f4349344b45a
Diffstat (limited to 'src/vectors-8-SSE2.h')
-rw-r--r-- | src/vectors-8-SSE2.h | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h index 56c614d..3b11990 100644 --- a/src/vectors-8-SSE2.h +++ b/src/vectors-8-SSE2.h @@ -16,16 +16,26 @@ #ifdef __SSE4A__ // AMD's SSE 4a # include <ammintrin.h> + +// Intel compilers don't support SSE 4a. Here is how we can implement +// these instructions in assembler instead: + +// inline void __attribute__((__always_inline__)) +// _mm_stream_sd (double *p, __m128d x) +// { +// asm ("movntsd %[x],%[p]" : "=m" (*p) : [p] "m" (*p), [x] "x" (x)); +// } + #endif #ifdef __SSE4_1__ -#define vec8_architecture "SSE4.1 (64-bit precision)" +# define vec8_architecture "SSE4.1 (64-bit precision)" #elif defined(__SSE4A__) -#define vec8_architecture "SSE4A (64-bit precision)" +# define vec8_architecture "SSE4A (64-bit precision)" #else -#define vec8_architecture "SSE2 (64-bit precision)" +# define vec8_architecture "SSE2 (64-bit precision)" #endif // Vector type corresponding to CCTK_REAL |