diff options
author | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2010-12-07 16:03:09 +0000 |
---|---|---|
committer | eschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a> | 2010-12-07 16:03:09 +0000 |
commit | d728013d0b8c0eec323cee76522f77ff70ec8bab (patch) | |
tree | 6d79e2701fc56c2df3916780f21a0db78fd0ff19 | |
parent | 421103bf1df43f250a452df460553f72c824f8db (diff) |
Correct vectorised fabs() function for Intel
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@5 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r-- | src/vectors-intel-4.h | 7 | ||||
-rw-r--r-- | src/vectors-intel-8.h | 7 |
2 files changed, 12 insertions, 2 deletions
diff --git a/src/vectors-intel-4.h b/src/vectors-intel-4.h index 4549a70..73c90be 100644 --- a/src/vectors-intel-4.h +++ b/src/vectors-intel-4.h @@ -110,6 +110,11 @@ static const union { __m128 v; } k4sign_mask_union = {{ 0x80000000U, 0x80000000U, 0x80000000U, 0x80000000U }}; #define k4sign_mask (k4sign_mask_union.v) +static const union { + unsigned i[4]; + __m128 v; +} k4abs_mask_union = {{ 0x7fffffffU, 0x7fffffffU, 0x7fffffffU, 0x7fffffffU }}; +#define k4abs_mask (k4abs_mask_union.v) // Operators #define k4pos(x) (x) @@ -127,7 +132,7 @@ static const union { #define k4nmsub(x,y,z) (k4sub(z,k4mul(x,y))) // Cheap functions -#define k4fabs(x) (_mm_andnot_ps(x,k4sign_mask)) +#define k4fabs(x) (_mm_and_ps(x,k4abs_mask)) #define k4fmax(x,y) (_mm_max_ps(x,y)) #define k4fmin(x,y) (_mm_min_ps(x,y)) #define k4fnabs(x) (_mm_or_ps(x,k4sign_mask)) diff --git a/src/vectors-intel-8.h b/src/vectors-intel-8.h index a9e4764..35dffa6 100644 --- a/src/vectors-intel-8.h +++ b/src/vectors-intel-8.h @@ -74,6 +74,11 @@ static const union { __m128d v; } k8sign_mask_union = {{ 0x8000000000000000ULL, 0x8000000000000000ULL }}; #define k8sign_mask (k8sign_mask_union.v) +static const union { + unsigned long long i[2]; + __m128d v; +} k8abs_mask_union = {{ 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }}; +#define k8abs_mask (k8sign_mask_union.v) // Operators #define k8pos(x) (x) @@ -91,7 +96,7 @@ static const union { #define k8nmsub(x,y,z) (k8sub(z,k8mul(x,y))) // Cheap functions -#define k8fabs(x) (_mm_andnot_pd(x,k8sign_mask)) +#define k8fabs(x) (_mm_and_pd(x,k8abs_mask)) #define k8fmax(x,y) (_mm_max_pd(x,y)) #define k8fmin(x,y) (_mm_min_pd(x,y)) #define k8fnabs(x) (_mm_or_pd(x,k8sign_mask)) |