aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2012-11-14 19:00:17 +0000
committereschnett <eschnett@105869f7-3296-0410-a4ea-f4349344b45a>2012-11-14 19:00:17 +0000
commitdaf2693619c35d5775091aa8fd2911c4cb7f57d5 (patch)
tree06994bbf9188775a0f5168b23c75eec330e78a47
parent2a3929883d01ba26feb60c22b18ec910bc12d1e0 (diff)
Allow const QPX vectors
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/Vectors/trunk@73 105869f7-3296-0410-a4ea-f4349344b45a
-rw-r--r--src/vectors-8-QPX.h62
1 files changed, 31 insertions, 31 deletions
diff --git a/src/vectors-8-QPX.h b/src/vectors-8-QPX.h
index 93a7707..4d4991f 100644
--- a/src/vectors-8-QPX.h
+++ b/src/vectors-8-QPX.h
@@ -22,7 +22,14 @@
// Vector type corresponding to CCTK_REAL
// TODO: Use a typedef to avoid the "const" issue? Or use a struct?
-#define CCTK_REAL8_VEC vector4double
+// #define CCTK_REAL8_VEC vector4double
+struct CCTK_REAL8_VEC {
+ vector4double v;
+ CCTK_REAL8_VEC() {}
+ CCTK_REAL8_VEC(CCTK_REAL8_VEC const& x): v(x.v) {}
+ CCTK_REAL8_VEC(vector4double v_): v(v_) {}
+ operator vector4double() const { return v; }
+};
// Number of vector elements in a CCTK_REAL_VEC
#define CCTK_REAL8_VEC_SIZE 4
@@ -35,13 +42,6 @@
-union k8const_t {
- CCTK_REAL8 f[CCTK_REAL8_VEC_SIZE];
- CCTK_REAL8_VEC vf;
-};
-
-
-
// Create vectors, extract vector elements
#define vec8_set1(a) (vec_splats(a))
@@ -64,7 +64,7 @@ union k8const_t {
({ \
CCTK_REAL8 const& p__=(p_); \
CCTK_REAL8& p = *(CCTK_REAL8*)&p__; \
- CCTK_REAL8_VEC v1, v2, vp; \
+ vector4double v1, v2, vp; \
v1 = vec_ld(0,&p); /* load the left part of the vector */ \
v2 = vec_ld(32,&p); /* load the right part of the vector */ \
vp = vec_lvsl(0,&p); /* generate control value */ \
@@ -105,28 +105,28 @@ union k8const_t {
// Store a vector to memory (aligned and non-temporal); this stores to
// a reference to a scalar
#define vec8_store(p,x) (vec_sta(x,0,&(p)))
-#define vec8_storeu(p_,x_) \
- ({ \
- CCTK_REAL8 & p__=(p_); \
- CCTK_REAL8_VEC const x__=(x_); \
- CCTK_REAL8 & p=p__; \
- CCTK_REAL8_VEC const x=x__; \
- CCTK_REAL8_VEC v1, v2, v3, vp, m1, m2, m3; \
- /* generate insert masks */ \
- vp = vec_lvsr(0,&p); \
- m1 = k8lfalse; \
- m2 = k8ltrue; \
- m3 = vec_perm(m1,m2,vp); \
- /* get existing data */ \
- v1 = vec_ld(0,&p); \
- v2 = vec_ld(32,&p); \
- /* permute and insert */ \
- v3 = vec_perm(x,x,vp); \
- v1 = vec_sel(v1,v3,m3); \
- v2 = vec_sel(v3,v2,m3); \
- /* store data back */ \
- vec_st(0,&p,v1); \
- vec_st(32,&p,v2); \
+#define vec8_storeu(p_,x_) \
+ ({ \
+ CCTK_REAL8& p__=(p_); \
+ CCTK_REAL8_VEC x__=(x_); \
+ CCTK_REAL8& p=p__; \
+ CCTK_REAL8_VEC x=x__; \
+ CCTK_REAL8_VEC v1, v2, v3, vp, m1, m2, m3; \
+ /* generate insert masks */ \
+ vp = vec_lvsr(0,&p); \
+ m1 = k8lfalse; \
+ m2 = k8ltrue; \
+ m3 = vec_perm(m1,m2,vp); \
+ /* get existing data */ \
+ v1 = vec_ld(0,&p); \
+ v2 = vec_ld(32,&p); \
+ /* permute and insert */ \
+ v3 = vec_perm(x,x,vp); \
+ v1 = vec_sel(v1,v3,m3); \
+ v2 = vec_sel(v3,v2,m3); \
+ /* store data back */ \
+ vec_st(0,&p,v1); \
+ vec_st(32,&p,v2); \
})
#define vec8_store_nta(p,x) (vec_sta(x,0,&(p))) // this doesn't avoid the cache