From 21f90caa6e9bdfdb9f6243f7c32d1e429977f6bd Mon Sep 17 00:00:00 2001 From: eschnett Date: Mon, 20 Jun 2011 02:18:30 +0000 Subject: Add new API elements "kifthen" and "vec_architecture" git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@12 105869f7-3296-0410-a4ea-f4349344b45a --- src/vectors-4-Altivec.h | 5 +++ src/vectors-4-SSE.h | 26 +++++++++++++++ src/vectors-4-default.h | 4 +++ src/vectors-8-AVX.h | 4 +++ src/vectors-8-DoubleHummer.h | 4 +++ src/vectors-8-SSE2.h | 76 +++++++++++++++++++++++--------------------- src/vectors-8-VSX.h | 5 +++ src/vectors-8-default.h | 4 +++ src/vectors.h | 8 +++++ 9 files changed, 99 insertions(+), 37 deletions(-) diff --git a/src/vectors-4-Altivec.h b/src/vectors-4-Altivec.h index b44b492..1544504 100644 --- a/src/vectors-4-Altivec.h +++ b/src/vectors-4-Altivec.h @@ -9,6 +9,8 @@ +#define vec4_architecture "Altivec" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL4_VEC vector float @@ -158,3 +160,6 @@ #define k4log(x) K4REPL(log,x) #define k4pow(x,a) K4REPL2(pow,x,a) #define k4sqrt(x) K4REPL(sqrt,x) + +#define k4ifthen(x,y,z) \ + vec_sel(vec_sra(vec_convert((x), &(vector int*)0, 31), (y), (z)) diff --git a/src/vectors-4-SSE.h b/src/vectors-4-SSE.h index e6dc735..8319c49 100644 --- a/src/vectors-4-SSE.h +++ b/src/vectors-4-SSE.h @@ -6,6 +6,10 @@ #include +#ifdef __SSE4_1__ +// Intel's SSE 4.1 +# include +#endif #ifdef __SSE4A__ // AMD's SSE 4a # include @@ -13,6 +17,8 @@ +#define vec4_architecture "SSE" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL4_VEC __m128 @@ -292,3 +298,23 @@ static const union { #define k4exp(x) K4REPL(exp,x) #define k4log(x) K4REPL(log,x) #define k4pow(x,a) K4REPL2(pow,x,a) + +// Choice [sign(x)>0 ? y : z] +#ifdef __SSE4_1__ +# define k4ifthen(x,y,z) (_mm_blendv_ps(y,z,x)) +#else +# define k4ifthen(x,y,z) \ + ({ \ + CCTK_REAL4_VEC const xx=(x_); \ + CCTK_REAL4_VEC const x=xx; \ + CCTK_REAL4_VEC const yy=(y_); \ + CCTK_REAL4_VEC const y=yy; \ + CCTK_REAL4_VEC const zz=(z_); \ + CCTK_REAL4_VEC const z=zz; \ + CCTK_REAL4_VEC const c = _mm_and_ps(x,k4sign_mask); \ + vec4_set(vec4_elt0(not vec4_elt0(c) ? y : z), \ + vec4_elt1(not vec4_elt1(c) ? y : z), \ + vec4_elt2(not vec4_elt2(c) ? y : z), \ + vec4_elt3(not vec4_elt3(c) ? y : z)); \ + }) +#endif diff --git a/src/vectors-4-default.h b/src/vectors-4-default.h index 1277040..b59db7c 100644 --- a/src/vectors-4-default.h +++ b/src/vectors-4-default.h @@ -8,6 +8,8 @@ +#define vec4_architecture "scalar (no vectorisation)" + // Use CCTK_REAL4 #define CCTK_REAL4_VEC CCTK_REAL4 @@ -78,3 +80,5 @@ #define k4log(x) (logf(x)) #define k4pow(x,a) (powf(x,a)) #define k4sqrt(x) (sqrtf(x)) + +#define k4ifthen(x,y,z) ((x)>=0?(y):(z)) diff --git a/src/vectors-8-AVX.h b/src/vectors-8-AVX.h index 0f08096..ae7a4d5 100644 --- a/src/vectors-8-AVX.h +++ b/src/vectors-8-AVX.h @@ -13,6 +13,8 @@ +#define vec8_architecture "AVX" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL8_VEC __m256d @@ -194,3 +196,5 @@ static const k8const_t k8abs_mask_union = #define k8exp(x) K8REPL(exp,x) #define k8log(x) K8REPL(log,x) #define k8pow(x,a) K8REPL2(pow,x,a) + +#define k8ifthen(x,y,z) (_mm256_blendv_pd(y,z,x)) diff --git a/src/vectors-8-DoubleHummer.h b/src/vectors-8-DoubleHummer.h index 16ed8a0..62b5f93 100644 --- a/src/vectors-8-DoubleHummer.h +++ b/src/vectors-8-DoubleHummer.h @@ -9,6 +9,8 @@ +#define vec8_architecture "Double Hummer" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL8_VEC double _Complex @@ -204,3 +206,5 @@ #define k8log(x) K8REPL(log,x) #define k8pow(x,a) K8REPL2(pow,x,a) #define k8sqrt(x) K8REPL(sqrt,x) + +#define k8ifthen(x,y,z) fpsel(y,z,x) diff --git a/src/vectors-8-SSE2.h b/src/vectors-8-SSE2.h index 4a3f4e2..a559cf8 100644 --- a/src/vectors-8-SSE2.h +++ b/src/vectors-8-SSE2.h @@ -17,6 +17,8 @@ +#define vec8_architecture "SSE2" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL8_VEC __m128d @@ -156,43 +158,6 @@ static const union { } k8abs_mask_union = {{ 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }}; #define k8abs_mask (k8sign_mask_union.v) -// Choice [sign(x)>0 ? y : z] -#ifdef __SSE4_1__ -# define k8ifthen(x,y,z) (_mm_blendv_pd(y,z,x)) -#elif 0 -# define k8ifthen(x,y,z) \ - ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ - CCTK_REAL8_VEC const zz=(z_); \ - CCTK_REAL8_VEC const z=zz; \ - int const m = _mm_movemask_pd(x); \ - CCTK_REAL8_VEC r; \ - switch (m) { \ - case 0: r = y; break; \ - case 1: r = _mm_move_sd(y,z); break; \ - case 2: r = _mm_move_sd(z,y); break; \ - case 3: r = z; break; \ - } \ - r; \ - }) -#else -# define k8ifthen(x,y,z) \ - ({ \ - CCTK_REAL8_VEC const xx=(x_); \ - CCTK_REAL8_VEC const x=xx; \ - CCTK_REAL8_VEC const yy=(y_); \ - CCTK_REAL8_VEC const y=yy; \ - CCTK_REAL8_VEC const zz=(z_); \ - CCTK_REAL8_VEC const z=zz; \ - CCTK_REAL8_VEC const c = _mm_and_pd(x,k8sign_mask); \ - vec8_set(not vec8_elt0(c) ? vec8_elt0(y) : vec8_elt0(z), \ - not vec8_elt1(c) ? vec8_elt1(y) : vec8_elt1(z)); \ - }) -#endif - // Operators #define k8pos(x) (x) #define k8neg(x) (_mm_xor_pd(x,k8sign_mask)) @@ -236,3 +201,40 @@ static const union { #define k8exp(x) K8REPL(exp,x) #define k8log(x) K8REPL(log,x) #define k8pow(x,a) K8REPL2(pow,x,a) + +// Choice [sign(x)>0 ? y : z] +#ifdef __SSE4_1__ +# define k8ifthen(x,y,z) (_mm_blendv_pd(y,z,x)) +#elif 0 +# define k8ifthen(x,y,z) \ + ({ \ + CCTK_REAL8_VEC const xx=(x_); \ + CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const yy=(y_); \ + CCTK_REAL8_VEC const y=yy; \ + CCTK_REAL8_VEC const zz=(z_); \ + CCTK_REAL8_VEC const z=zz; \ + int const m = _mm_movemask_pd(x); \ + CCTK_REAL8_VEC r; \ + switch (m) { \ + case 0: r = y; break; \ + case 1: r = _mm_move_sd(y,z); break; \ + case 2: r = _mm_move_sd(z,y); break; \ + case 3: r = z; break; \ + } \ + r; \ + }) +#else +# define k8ifthen(x,y,z) \ + ({ \ + CCTK_REAL8_VEC const xx=(x_); \ + CCTK_REAL8_VEC const x=xx; \ + CCTK_REAL8_VEC const yy=(y_); \ + CCTK_REAL8_VEC const y=yy; \ + CCTK_REAL8_VEC const zz=(z_); \ + CCTK_REAL8_VEC const z=zz; \ + CCTK_REAL8_VEC const c = _mm_and_pd(x,k8sign_mask); \ + vec8_set(vec8_elt0(not vec8_elt0(c) ? y : z), \ + vec8_elt1(not vec8_elt1(c) ? y : z)); \ + }) +#endif diff --git a/src/vectors-8-VSX.h b/src/vectors-8-VSX.h index f47d1df..d01c22a 100644 --- a/src/vectors-8-VSX.h +++ b/src/vectors-8-VSX.h @@ -9,6 +9,8 @@ +#define vec8_architecture "VSX" + // Vector type corresponding to CCTK_REAL #define CCTK_REAL8_VEC vector double @@ -106,3 +108,6 @@ #define k8log(x) K8REPL(log,x) #define k8pow(x,a) K8REPL2(pow,x,a) #define k8sqrt(x) K8REPL(sqrt,x) + +#define k8ifthen(x,y,z) \ + vec_sel(vec_sra(vec_convert((x), &(vector long long*)0, 63), (y), (z)) diff --git a/src/vectors-8-default.h b/src/vectors-8-default.h index bbe0150..6f81b91 100644 --- a/src/vectors-8-default.h +++ b/src/vectors-8-default.h @@ -8,6 +8,8 @@ +#define vec8_architecture "scalar (no vectorisation)" + // Use CCTK_REAL8 #define CCTK_REAL8_VEC CCTK_REAL8 @@ -78,3 +80,5 @@ #define k8log(x) (log(x)) #define k8pow(x,a) (pow(x,a)) #define k8sqrt(x) (sqrt(x)) + +#define k8ifthen(x,y,z) ((x)>=0?(y):(z)) diff --git a/src/vectors.h b/src/vectors.h index 03296e7..2a4b04e 100644 --- a/src/vectors.h +++ b/src/vectors.h @@ -44,6 +44,8 @@ #if defined(CCTK_REAL_PRECISION_4) +# define vec_architecture vec8_architecture + # define CCTK_REAL_VEC CCTK_REAL4_VEC # define CCTK_REAL_VEC_SIZE CCTK_REAL4_VEC_SIZE @@ -85,8 +87,12 @@ # define kpow k4pow # define ksqrt k4sqrt +# define kifthen k4ifthen + #elif defined(CCTK_REAL_PRECISION_8) +# define vec_architecture vec4_architecture + # define CCTK_REAL_VEC CCTK_REAL8_VEC # define CCTK_REAL_VEC_SIZE CCTK_REAL8_VEC_SIZE @@ -128,6 +134,8 @@ # define kpow k8pow # define ksqrt k8sqrt +# define kifthen k8ifthen + #else # error "Unknown CCTK_REAL_PRECISION" -- cgit v1.2.3