From 49084a03a0685df85894e22821a7ef63b2d8cf1c Mon Sep 17 00:00:00 2001 From: eschnett Date: Fri, 24 Dec 2010 00:43:09 +0000 Subject: Make vectorisation work with PGI compilers git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@6 105869f7-3296-0410-a4ea-f4349344b45a --- src/vectors-intel-4.h | 15 ++++++++++++++- src/vectors-intel-8.h | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/vectors-intel-4.h b/src/vectors-intel-4.h index 73c90be..bc50e68 100644 --- a/src/vectors-intel-4.h +++ b/src/vectors-intel-4.h @@ -48,7 +48,19 @@ CCTK_REAL4_VEC const xelt3=(x); \ vec4_elt0(_mm_shuffle_ps(xelt3,xelt3,_MM_SHUFFLE(3,2,1,0))); \ }) -#define vec4_elt(x,d) \ +#if defined(__PGI) && defined (__amd64__) +# define vec4_elt(x,d) \ +({ \ + CCTK_REAL4_VEC const xelt=(x); \ + CCTK_REAL4 aelt; \ + if (d==0) aelt=vec4_elt0(xelt); \ + else if (d==1) aelt=vec4_elt1(xelt); \ + else if (d==2) aelt=vec4_elt2(xelt); \ + else if (d==3) aelt=vec4_elt3(xelt); \ + aelt; \ +}) +#else +# define vec4_elt(x,d) \ ({ \ CCTK_REAL4_VEC const xelt=(x); \ CCTK_REAL4 aelt; \ @@ -60,6 +72,7 @@ } \ aelt; \ }) +#endif diff --git a/src/vectors-intel-8.h b/src/vectors-intel-8.h index 35dffa6..34aa24f 100644 --- a/src/vectors-intel-8.h +++ b/src/vectors-intel-8.h @@ -22,13 +22,33 @@ #define vec8_set1(a) (_mm_set1_pd(a)) #define vec8_set(a,b) (_mm_set_pd(b,a)) // note reversed arguments -#define vec8_elt0(x) (_mm_cvtsd_f64(x)) // this is a no-op +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI 9 compilers +# define vec8_elt0(x) \ +({ \ + CCTK_REAL8 aelt0; \ + asm ("" : "=x" (aelt0) : "0" (x)); \ + aelt0; \ +}) +#else +# define vec8_elt0(x) (_mm_cvtsd_f64(x)) // this is a no-op +#endif #define vec8_elt1(x) \ ({ \ CCTK_REAL8_VEC const xelt1=(x); \ vec8_elt0(_mm_unpackhi_pd(xelt1,xelt1)); \ }) -#define vec8_elt(x,d) \ +#if defined(__PGI) && defined (__amd64__) +# define vec8_elt(x,d) \ +({ \ + CCTK_REAL8_VEC const xelt=(x); \ + CCTK_REAL8 aelt; \ + if (d==0) aelt=vec8_elt0(xelt); \ + else if (d==1) aelt=vec8_elt1(xelt); \ + aelt; \ +}) +#else +# define vec8_elt(x,d) \ ({ \ CCTK_REAL8_VEC const xelt=(x); \ CCTK_REAL8 aelt; \ @@ -38,6 +58,7 @@ } \ aelt; \ }) +#endif @@ -62,8 +83,14 @@ // Store a lower or higher partial vector (aligned and non-temporal); // the non-temporal hint is probably ignored -#define vec8_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) -#define vec8_store_nta_partial_hi(p,x,n) (_mm_storeh_pd(&(p)+1,x)) +#if 1 +# define vec8_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) +# define vec8_store_nta_partial_hi(p,x,n) (_mm_storeh_pd(&(p)+1,x)) +#else +// This is slower; we would need a non-temporal read +# define vec8_store_nta_partial_lo(p,x,n) (vec8_store_nta(p,_mm_loadh_pd(x,&(p)+1))) +# define vec8_store_nta_partial_hi(p,x,n) (vec8_store_nta(p,_mm_loadl_pd(x,&(p)))) +#endif -- cgit v1.2.3