From 9305624ffce90bca91be92db1a718e428a1cdf4c Mon Sep 17 00:00:00 2001 From: eschnett Date: Thu, 15 Dec 2011 15:30:14 +0000 Subject: Don't use ; this does not exist everywhere git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@40 105869f7-3296-0410-a4ea-f4349344b45a --- src/vectors-4-SSE.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'src/vectors-4-SSE.h') diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h index 927d54e..8fea7ba 100644 --- a/src/vectors-4-SSE.h +++ b/src/vectors-4-SSE.h @@ -8,7 +8,18 @@ #include #include -#include +#include +#ifdef __SSE4_1__ +// Intel's SSE 4.1 +# include +#endif +#ifdef __SSE4A__ +// AMD's SSE 4a +# include +#endif +#ifdef __FMA4__ +# include +#endif @@ -265,10 +276,17 @@ static const union { #define k4div(x,y) (_mm_div_ps(x,y)) // Fused multiply-add, defined as [+-]x*y[+-]z -#define k4madd(x,y,z) (k4add(k4mul(x,y),z)) -#define k4msub(x,y,z) (k4sub(k4mul(x,y),z)) -#define k4nmadd(x,y,z) (k4sub(k4neg(z),k4mul(x,y))) -#define k4nmsub(x,y,z) (k4sub(z,k4mul(x,y))) +#ifdef __FMA4__ +# define k4madd(x,y,z) (_mm_macc_ps(x,y,z)) +# define k4msub(x,y,z) (_mm_msub_ps(x,y,z)) +# define k4nmadd(x,y,z) (_mm_nmsub_ps(x,y,z)) +# define k4nmsub(x,y,z) (_mm_nmacc_ps(x,y,z)) +#else +# define k4madd(x,y,z) (k4add(k4mul(x,y),z)) +# define k4msub(x,y,z) (k4sub(k4mul(x,y),z)) +# define k4nmadd(x,y,z) (k4sub(k4neg(z),k4mul(x,y))) +# define k4nmsub(x,y,z) (k4sub(z,k4mul(x,y))) +#endif // Cheap functions #define k4fabs(x) (_mm_andnot_ps(k4sign_mask,x)) -- cgit v1.2.3