aboutsummaryrefslogtreecommitdiff
path: root/ML_BSSN
diff options
context:
space:
mode:
authorBarry Wardell <barry.wardell@gmail.com>2011-08-18 00:12:46 +0200
committerBarry Wardell <barry.wardell@gmail.com>2011-08-25 21:07:02 +0200
commit16c7cc3c66cab59041cfb2203953eaa21cd7812d (patch)
treec17f1b4606986e34a48d67e726ff2b8f3b132cdb /ML_BSSN
parent2888d221261da25e7802b76c46ee3e10ca578418 (diff)
Regenerate code with Vectorisation enabled.
Note that this also applies the changes from commit 3ba8a55ae2578cb6dc06f0ec8b81f86b3a2654ac to ML_BSSN_MP, ML_BSSN_MP_Helper, ML_BSSN_MP_O8, ML_BSSN_MP_O8_Helper, ML_BSSN_O2, ML_BSSN_O2_Helper, ML_BSSN_O8, ML_BSSN_O8_Helper, ML_BSSN_UPW and ML_BSSN_UPW_Helper, which were accidentally missed in that commit.
Diffstat (limited to 'ML_BSSN')
-rw-r--r--ML_BSSN/configuration.ccl1
-rw-r--r--ML_BSSN/interface.ccl1
-rw-r--r--ML_BSSN/src/Differencing.h249
-rw-r--r--ML_BSSN/src/ML_BSSN_Advect.cc841
-rw-r--r--ML_BSSN/src/ML_BSSN_Dissipation.cc625
-rw-r--r--ML_BSSN/src/ML_BSSN_InitGamma.cc126
-rw-r--r--ML_BSSN/src/ML_BSSN_InitRHS.cc280
-rw-r--r--ML_BSSN/src/ML_BSSN_Minkowski.cc280
-rw-r--r--ML_BSSN/src/ML_BSSN_RHS1.cc829
-rw-r--r--ML_BSSN/src/ML_BSSN_RHS2.cc951
-rw-r--r--ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc280
-rw-r--r--ML_BSSN/src/ML_BSSN_boundary.cc280
-rw-r--r--ML_BSSN/src/ML_BSSN_constraints1.cc800
-rw-r--r--ML_BSSN/src/ML_BSSN_constraints2.cc511
-rw-r--r--ML_BSSN/src/ML_BSSN_convertFromADMBase.cc315
-rw-r--r--ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc418
-rw-r--r--ML_BSSN/src/ML_BSSN_convertToADMBase.cc280
-rw-r--r--ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc254
-rw-r--r--ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc181
-rw-r--r--ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc181
-rw-r--r--ML_BSSN/src/ML_BSSN_enforce.cc207
21 files changed, 4431 insertions, 3459 deletions
diff --git a/ML_BSSN/configuration.ccl b/ML_BSSN/configuration.ccl
index 8e2c3c5..bdbc1bd 100644
--- a/ML_BSSN/configuration.ccl
+++ b/ML_BSSN/configuration.ccl
@@ -2,3 +2,4 @@
REQUIRES GenericFD
REQUIRES LoopControl
+REQUIRES Vectors
diff --git a/ML_BSSN/interface.ccl b/ML_BSSN/interface.ccl
index 91b6696..cf4a371 100644
--- a/ML_BSSN/interface.ccl
+++ b/ML_BSSN/interface.ccl
@@ -11,6 +11,7 @@ USES INCLUDE: Symmetry.h
USES INCLUDE: sbp_calc_coeffs.h
USES INCLUDE: Boundary.h
USES INCLUDE: loopcontrol.h
+USES INCLUDE: vectors.h
CCTK_INT FUNCTION MoLRegisterEvolved(CCTK_INT IN EvolvedIndex, CCTK_INT IN RHSIndex)
USES FUNCTION MoLRegisterEvolved
diff --git a/ML_BSSN/src/Differencing.h b/ML_BSSN/src/Differencing.h
index 23a98cd..3fab658 100644
--- a/ML_BSSN/src/Differencing.h
+++ b/ML_BSSN/src/Differencing.h
@@ -1,324 +1,309 @@
+#include <assert.h>
+#include "vectors.h"
+
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth1(u) ((-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]))*p1o12dx)
+# define PDstandardNth1(u) (kmul(p1o12dx,kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),ToReal(-8),kmsub(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]),ToReal(8),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]))))))
#else
# define PDstandardNth1(u) (PDstandardNth1_impl(u,p1o12dx,cdj,cdk))
-static CCTK_REAL PDstandardNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]))*p1o12dx;
+ return kmul(p1o12dx,kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),ToReal(-8),kmsub(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]),ToReal(8),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth2(u) ((-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]))*p1o12dy)
+# define PDstandardNth2(u) (kmul(p1o12dy,kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),ToReal(-8),kmsub(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]),ToReal(8),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]))))))
#else
# define PDstandardNth2(u) (PDstandardNth2_impl(u,p1o12dy,cdj,cdk))
-static CCTK_REAL PDstandardNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]))*p1o12dy;
+ return kmul(p1o12dy,kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),ToReal(-8),kmsub(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]),ToReal(8),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth3(u) ((-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]))*p1o12dz)
+# define PDstandardNth3(u) (kmul(p1o12dz,kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),kmadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),ToReal(-8),kmsub(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]),ToReal(8),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]))))))
#else
# define PDstandardNth3(u) (PDstandardNth3_impl(u,p1o12dz,cdj,cdk))
-static CCTK_REAL PDstandardNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + 8*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]))*p1o12dz;
+ return kmul(p1o12dz,kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),kmadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),ToReal(-8),kmsub(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]),ToReal(8),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth11(u) ((30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]))*pm1o12dx2)
+# define PDstandardNth11(u) (kmul(pm1o12dx2,kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30)))))))
#else
# define PDstandardNth11(u) (PDstandardNth11_impl(u,pm1o12dx2,cdj,cdk))
-static CCTK_REAL PDstandardNth11_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dx2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth11_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dx2, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth11_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dx2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth11_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dx2, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]))*pm1o12dx2;
+ return kmul(pm1o12dx2,kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth22(u) ((30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]))*pm1o12dy2)
+# define PDstandardNth22(u) (kmul(pm1o12dy2,kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30)))))))
#else
# define PDstandardNth22(u) (PDstandardNth22_impl(u,pm1o12dy2,cdj,cdk))
-static CCTK_REAL PDstandardNth22_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dy2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth22_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dy2, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth22_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dy2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth22_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dy2, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]))*pm1o12dy2;
+ return kmul(pm1o12dy2,kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth33(u) ((30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]))*pm1o12dz2)
+# define PDstandardNth33(u) (kmul(pm1o12dz2,kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30)))))))
#else
# define PDstandardNth33(u) (PDstandardNth33_impl(u,pm1o12dz2,cdj,cdk))
-static CCTK_REAL PDstandardNth33_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dz2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth33_impl(CCTK_REAL const* restrict const u, CCTK_REAL const pm1o12dz2, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth33_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dz2, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth33_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const pm1o12dz2, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (30*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 16*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]))*pm1o12dz2;
+ return kmul(pm1o12dz2,kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(-16),kmul(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(30))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth12(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]))*p1o144dxdy)
+# define PDstandardNth12(u) (kmul(p1o144dxdy,kadd(vec_loadu_maybe3(-2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])),ToReal(64))),vec_loadu_maybe3(2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)])),vec_loadu_maybe3(-2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]))))))))
#else
# define PDstandardNth12(u) (PDstandardNth12_impl(u,p1o144dxdy,cdj,cdk))
-static CCTK_REAL PDstandardNth12_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth12_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth12_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth12_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]))*p1o144dxdy;
+ return kmul(p1o144dxdy,kadd(vec_loadu_maybe3(-2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])),ToReal(64))),vec_loadu_maybe3(2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)])),vec_loadu_maybe3(-2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth13(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]))*p1o144dxdz)
+# define PDstandardNth13(u) (kmul(p1o144dxdz,kadd(vec_loadu_maybe3(-2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(-1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(-2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(-2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)])),vec_loadu_maybe3(-2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]))))))))
#else
# define PDstandardNth13(u) (PDstandardNth13_impl(u,p1o144dxdz,cdj,cdk))
-static CCTK_REAL PDstandardNth13_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth13_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth13_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth13_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]))*p1o144dxdz;
+ return kmul(p1o144dxdz,kadd(vec_loadu_maybe3(-2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(-1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(-2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(-2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)])),vec_loadu_maybe3(-2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth21(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]))*p1o144dxdy)
+# define PDstandardNth21(u) (kmul(p1o144dxdy,kadd(vec_loadu_maybe3(-2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])),ToReal(64))),vec_loadu_maybe3(2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)])),vec_loadu_maybe3(-2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]))))))))
#else
# define PDstandardNth21(u) (PDstandardNth21_impl(u,p1o144dxdy,cdj,cdk))
-static CCTK_REAL PDstandardNth21_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth21_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth21_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth21_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]))*p1o144dxdy;
+ return kmul(p1o144dxdy,kadd(vec_loadu_maybe3(-2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(2)+cdk*(0)]),kmadd(kadd(vec_loadu_maybe3(-1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-1)+cdk*(0)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(1)+cdk*(0)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(2)+cdk*(0)]),kadd(vec_loadu_maybe3(1,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(-2)+cdk*(0)]),kadd(vec_loadu_maybe3(-2,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(1)+cdk*(0)]),vec_loadu_maybe3(2,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-1)+cdk*(0)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(1,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(1)+cdk*(0)])),ToReal(64))),vec_loadu_maybe3(2,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(-2)+cdk*(0)])),vec_loadu_maybe3(-2,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(2)+cdk*(0)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth23(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]))*p1o144dydz)
+# define PDstandardNth23(u) (kmul(p1o144dydz,kadd(vec_loadu_maybe3(0,-2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,-1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]),vec_loadu_maybe3(0,1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(0,-1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,-2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]),vec_loadu_maybe3(0,2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(0,-1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,-2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]),vec_loadu_maybe3(0,2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(0,-1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]),vec_loadu_maybe3(0,1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(0,2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)])),vec_loadu_maybe3(0,-2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]))))))))
#else
# define PDstandardNth23(u) (PDstandardNth23_impl(u,p1o144dydz,cdj,cdk))
-static CCTK_REAL PDstandardNth23_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth23_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth23_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth23_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]))*p1o144dydz;
+ return kmul(p1o144dydz,kadd(vec_loadu_maybe3(0,-2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,-1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]),vec_loadu_maybe3(0,1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(0,-1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,-2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]),vec_loadu_maybe3(0,2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(0,-1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,-2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]),vec_loadu_maybe3(0,2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(0,-1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]),vec_loadu_maybe3(0,1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(0,2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)])),vec_loadu_maybe3(0,-2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth31(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]))*p1o144dxdz)
+# define PDstandardNth31(u) (kmul(p1o144dxdz,kadd(vec_loadu_maybe3(-2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(-1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(-2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(-2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)])),vec_loadu_maybe3(-2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]))))))))
#else
# define PDstandardNth31(u) (PDstandardNth31_impl(u,p1o144dxdz,cdj,cdk))
-static CCTK_REAL PDstandardNth31_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth31_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth31_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth31_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dxdz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]))*p1o144dxdz;
+ return kmul(p1o144dxdz,kadd(vec_loadu_maybe3(-2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(-1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(-1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(-2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(-1,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(2)]),kadd(vec_loadu_maybe3(1,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(-2)]),kadd(vec_loadu_maybe3(-2,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(1)]),vec_loadu_maybe3(2,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(-1,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(1,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(2,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(-2)])),vec_loadu_maybe3(-2,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(2)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDstandardNth32(u) ((-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]))*p1o144dydz)
+# define PDstandardNth32(u) (kmul(p1o144dydz,kadd(vec_loadu_maybe3(0,-2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,-1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]),vec_loadu_maybe3(0,1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(0,-1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,-2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]),vec_loadu_maybe3(0,2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(0,-1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,-2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]),vec_loadu_maybe3(0,2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(0,-1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]),vec_loadu_maybe3(0,1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(0,2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)])),vec_loadu_maybe3(0,-2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]))))))))
#else
# define PDstandardNth32(u) (PDstandardNth32_impl(u,p1o144dydz,cdj,cdk))
-static CCTK_REAL PDstandardNth32_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDstandardNth32_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDstandardNth32_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDstandardNth32_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o144dydz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])) + 64*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])) + 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])) - 8*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]))*p1o144dydz;
+ return kmul(p1o144dydz,kadd(vec_loadu_maybe3(0,-2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(2)]),kmadd(kadd(vec_loadu_maybe3(0,-1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(1)]),vec_loadu_maybe3(0,1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-1)])),ToReal(-64),kmadd(kadd(vec_loadu_maybe3(0,-1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,-2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(-1)]),vec_loadu_maybe3(0,2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(1)])))),ToReal(-8),ksub(ksub(kmadd(kadd(vec_loadu_maybe3(0,-1,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(2)]),kadd(vec_loadu_maybe3(0,1,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(-2)]),kadd(vec_loadu_maybe3(0,-2,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(1)]),vec_loadu_maybe3(0,2,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-1)])))),ToReal(8),kmul(kadd(vec_loadu_maybe3(0,-1,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(-1)]),vec_loadu_maybe3(0,1,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(1)])),ToReal(64))),vec_loadu_maybe3(0,2,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(-2)])),vec_loadu_maybe3(0,-2,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(2)])))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDdissipationNth1(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o64dx)
+# define PDdissipationNth1(u) (kmul(p1o64dx,kadd(vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(15))))))))
#else
# define PDdissipationNth1(u) (PDdissipationNth1_impl(u,p1o64dx,cdj,cdk))
-static CCTK_REAL PDdissipationNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDdissipationNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDdissipationNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDdissipationNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o64dx;
+ return kmul(p1o64dx,kadd(vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(15)))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDdissipationNth2(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o64dy)
+# define PDdissipationNth2(u) (kmul(p1o64dy,kadd(vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]),kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(15))))))))
#else
# define PDdissipationNth2(u) (PDdissipationNth2_impl(u,p1o64dy,cdj,cdk))
-static CCTK_REAL PDdissipationNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDdissipationNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDdissipationNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDdissipationNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o64dy;
+ return kmul(p1o64dy,kadd(vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]),kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(15)))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDdissipationNth3(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o64dz)
+# define PDdissipationNth3(u) (kmul(p1o64dz,kadd(vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]),kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(15))))))))
#else
# define PDdissipationNth3(u) (PDdissipationNth3_impl(u,p1o64dz,cdj,cdk))
-static CCTK_REAL PDdissipationNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDdissipationNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o64dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDdissipationNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDdissipationNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o64dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o64dz;
+ return kmul(p1o64dz,kadd(vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]),kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(15)))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNth1(u) ((-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(2*dir1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3*dir1)+cdj*(0)+cdk*(0)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-dir1)+cdj*(0)+cdk*(0)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]))*p1o12dx*dir1)
+# define PDupwindNth1(u) (kmul(p1o12dx,kmul(dir1,kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3*dir1)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-10),kmadd(vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2*dir1)+cdj*(0)+cdk*(0)]),ToReal(-6),kmadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-dir1)+cdj*(0)+cdk*(0)]),ToReal(-3),kmul(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]),ToReal(18)))))))))
#else
-# define PDupwindNth1(u) (PDupwindNth1_impl(u,p1o12dx,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDupwindNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(2*dir1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3*dir1)+cdj*(0)+cdk*(0)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-dir1)+cdj*(0)+cdk*(0)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]))*p1o12dx*dir1;
-}
+# define PDupwindNth1(u) (PDupwindNth1_impl(u,p1o12dx,cdj,cdk))
+static CCTK_REAL_VEC PDupwindNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNth1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNth2(u) ((-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2*dir2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3*dir2)+cdk*(0)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-dir2)+cdk*(0)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]))*p1o12dy*dir2)
+# define PDupwindNth2(u) (kmul(p1o12dy,kmul(dir2,kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3*dir2)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-10),kmadd(vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2*dir2)+cdk*(0)]),ToReal(-6),kmadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-dir2)+cdk*(0)]),ToReal(-3),kmul(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]),ToReal(18)))))))))
#else
-# define PDupwindNth2(u) (PDupwindNth2_impl(u,p1o12dy,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDupwindNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2*dir2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3*dir2)+cdk*(0)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-dir2)+cdk*(0)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]))*p1o12dy*dir2;
-}
+# define PDupwindNth2(u) (PDupwindNth2_impl(u,p1o12dy,cdj,cdk))
+static CCTK_REAL_VEC PDupwindNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNth2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNth3(u) ((-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2*dir3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3*dir3)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-dir3)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]))*p1o12dz*dir3)
+# define PDupwindNth3(u) (kmul(p1o12dz,kmul(dir3,kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3*dir3)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-10),kmadd(vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2*dir3)]),ToReal(-6),kmadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-dir3)]),ToReal(-3),kmul(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]),ToReal(18)))))))))
#else
-# define PDupwindNth3(u) (PDupwindNth3_impl(u,p1o12dz,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDupwindNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-10*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2*dir3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3*dir3)]) - 3*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-dir3)]) + 18*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]))*p1o12dz*dir3;
-}
+# define PDupwindNth3(u) (PDupwindNth3_impl(u,p1o12dz,cdj,cdk))
+static CCTK_REAL_VEC PDupwindNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNth3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o12dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDonesided1(u) ((-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]))*p1odx*dir1)
+# define PDonesided1(u) (kmul(p1odx,kmul(dir1,ksub(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)])))))
#else
-# define PDonesided1(u) (PDonesided1_impl(u,p1odx,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDonesided1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1odx, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDonesided1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1odx, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(dir1)+cdj*(0)+cdk*(0)]))*p1odx*dir1;
-}
+# define PDonesided1(u) (PDonesided1_impl(u,p1odx,cdj,cdk))
+static CCTK_REAL_VEC PDonesided1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1odx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDonesided1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1odx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDonesided2(u) ((-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]))*p1ody*dir2)
+# define PDonesided2(u) (kmul(p1ody,kmul(dir2,ksub(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]),vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)])))))
#else
-# define PDonesided2(u) (PDonesided2_impl(u,p1ody,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDonesided2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1ody, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDonesided2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1ody, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(dir2)+cdk*(0)]))*p1ody*dir2;
-}
+# define PDonesided2(u) (PDonesided2_impl(u,p1ody,cdj,cdk))
+static CCTK_REAL_VEC PDonesided2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1ody, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDonesided2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1ody, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDonesided3(u) ((-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]))*p1odz*dir3)
+# define PDonesided3(u) (kmul(p1odz,kmul(dir3,ksub(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]),vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)])))))
#else
-# define PDonesided3(u) (PDonesided3_impl(u,p1odz,cdj,cdk,dir1,dir2,dir3))
-static CCTK_REAL PDonesided3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1odz, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDonesided3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1odz, ptrdiff_t const cdj, ptrdiff_t const cdk, ptrdiff_t const dir1, ptrdiff_t const dir2, ptrdiff_t const dir3)
-{
- ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(dir3)]))*p1odz*dir3;
-}
+# define PDonesided3(u) (PDonesided3_impl(u,p1odz,cdj,cdk))
+static CCTK_REAL_VEC PDonesided3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1odz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDonesided3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1odz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+{ assert(0); return ToReal(1e30); /* ERROR */ }
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthAnti1(u) ((-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o24dx)
+# define PDupwindNthAnti1(u) (kmul(p1o24dx,kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),ToReal(-21),kmadd(vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),ToReal(6),kmul(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]),ToReal(21))),vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)])))))))
#else
# define PDupwindNthAnti1(u) (PDupwindNthAnti1_impl(u,p1o24dx,cdj,cdk))
-static CCTK_REAL PDupwindNthAnti1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthAnti1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthAnti1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthAnti1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o24dx;
+ return kmul(p1o24dx,kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),ToReal(-21),kmadd(vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),ToReal(6),kmul(vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)]),ToReal(21))),vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthSymm1(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o24dx)
+# define PDupwindNthSymm1(u) (kmul(p1o24dx,kadd(vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(15))))))))
#else
# define PDupwindNthSymm1(u) (PDupwindNthSymm1_impl(u,p1o24dx,cdj,cdk))
-static CCTK_REAL PDupwindNthSymm1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthSymm1_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthSymm1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthSymm1_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dx, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]))*p1o24dx;
+ return kmul(p1o24dx,kadd(vec_loadu_maybe3(-3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-3)+cdj*(0)+cdk*(0)]),kadd(vec_loadu_maybe3(3,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(3)+cdj*(0)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(-2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-2)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(2,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(2)+cdj*(0)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(-1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(-1)+cdj*(0)+cdk*(0)]),vec_loadu_maybe3(1,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(1)+cdj*(0)+cdk*(0)])),ToReal(15)))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthAnti2(u) ((-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o24dy)
+# define PDupwindNthAnti2(u) (kmul(p1o24dy,kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),ToReal(-21),kmadd(vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),ToReal(6),kmul(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]),ToReal(21))),vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)])))))))
#else
# define PDupwindNthAnti2(u) (PDupwindNthAnti2_impl(u,p1o24dy,cdj,cdk))
-static CCTK_REAL PDupwindNthAnti2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthAnti2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthAnti2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthAnti2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o24dy;
+ return kmul(p1o24dy,kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),ToReal(-21),kmadd(vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),ToReal(6),kmul(vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)]),ToReal(21))),vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthSymm2(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o24dy)
+# define PDupwindNthSymm2(u) (kmul(p1o24dy,kadd(vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]),kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(15))))))))
#else
# define PDupwindNthSymm2(u) (PDupwindNthSymm2_impl(u,p1o24dy,cdj,cdk))
-static CCTK_REAL PDupwindNthSymm2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthSymm2_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthSymm2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthSymm2_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dy, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]))*p1o24dy;
+ return kmul(p1o24dy,kadd(vec_loadu_maybe3(0,-3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-3)+cdk*(0)]),kadd(vec_loadu_maybe3(0,3,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(3)+cdk*(0)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,-2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-2)+cdk*(0)]),vec_loadu_maybe3(0,2,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(2)+cdk*(0)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,-1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(-1)+cdk*(0)]),vec_loadu_maybe3(0,1,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(1)+cdk*(0)])),ToReal(15)))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthAnti3(u) ((-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o24dz)
+# define PDupwindNthAnti3(u) (kmul(p1o24dz,kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),ToReal(-21),kmadd(vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),ToReal(6),kmul(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]),ToReal(21))),vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)])))))))
#else
# define PDupwindNthAnti3(u) (PDupwindNthAnti3_impl(u,p1o24dz,cdj,cdk))
-static CCTK_REAL PDupwindNthAnti3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthAnti3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthAnti3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthAnti3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + 21*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]) + 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) - 6*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]) - (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o24dz;
+ return kmul(p1o24dz,kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),ToReal(-21),kmadd(vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)]),ToReal(-6),ksub(kmadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),ToReal(6),kmul(vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)]),ToReal(21))),vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]))))));
}
#endif
#ifndef KRANC_DIFF_FUNCTIONS
-# define PDupwindNthSymm3(u) ((-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o24dz)
+# define PDupwindNthSymm3(u) (kmul(p1o24dz,kadd(vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]),kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(15))))))))
#else
# define PDupwindNthSymm3(u) (PDupwindNthSymm3_impl(u,p1o24dz,cdj,cdk))
-static CCTK_REAL PDupwindNthSymm3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
-static CCTK_REAL PDupwindNthSymm3_impl(CCTK_REAL const* restrict const u, CCTK_REAL const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
+static CCTK_REAL_VEC PDupwindNthSymm3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk) CCTK_ATTRIBUTE_NOINLINE CCTK_ATTRIBUTE_UNUSED;
+static CCTK_REAL_VEC PDupwindNthSymm3_impl(CCTK_REAL const* restrict const u, CCTK_REAL_VEC const p1o24dz, ptrdiff_t const cdj, ptrdiff_t const cdk)
{
ptrdiff_t const cdi=sizeof(CCTK_REAL);
- return (-20*(*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]) + 15*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])) - 6*((*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]) + (*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]))*p1o24dz;
+ return kmul(p1o24dz,kadd(vec_loadu_maybe3(0,0,-3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-3)]),kadd(vec_loadu_maybe3(0,0,3,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(3)]),kmadd(vec_loadu_maybe3(0,0,0,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(0)]),ToReal(-20),kmadd(kadd(vec_loadu_maybe3(0,0,-2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-2)]),vec_loadu_maybe3(0,0,2,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(2)])),ToReal(-6),kmul(kadd(vec_loadu_maybe3(0,0,-1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(-1)]),vec_loadu_maybe3(0,0,1,*(CCTK_REAL const*)&((char const*)(u))[cdi*(0)+cdj*(0)+cdk*(1)])),ToReal(15)))))));
}
#endif
diff --git a/ML_BSSN/src/ML_BSSN_Advect.cc b/ML_BSSN/src/ML_BSSN_Advect.cc
index fef62d8..3b09d04 100644
--- a/ML_BSSN/src/ML_BSSN_Advect.cc
+++ b/ML_BSSN/src/ML_BSSN_Advect.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_Advect_SelectBCs(CCTK_ARGUMENTS)
{
@@ -89,257 +90,258 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_Advect,
+ LC_LOOP3VEC (ML_BSSN_Advect,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL alpharhsL = alpharhs[index];
- CCTK_REAL ArhsL = Arhs[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At11rhsL = At11rhs[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At12rhsL = At12rhs[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At13rhsL = At13rhs[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At22rhsL = At22rhs[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At23rhsL = At23rhs[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL At33rhsL = At33rhs[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B1rhsL = B1rhs[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B2rhsL = B2rhs[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL B3rhsL = B3rhs[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta1rhsL = beta1rhs[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta2rhsL = beta2rhs[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL beta3rhsL = beta3rhs[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt11rhsL = gt11rhs[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt12rhsL = gt12rhs[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt13rhsL = gt13rhs[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt22rhsL = gt22rhs[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt23rhsL = gt23rhs[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL gt33rhsL = gt33rhs[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL phirhsL = phirhs[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL trKrhsL = trKrhs[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt1rhsL = Xt1rhs[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt2rhsL = Xt2rhs[index];
- CCTK_REAL Xt3L = Xt3[index];
- CCTK_REAL Xt3rhsL = Xt3rhs[index];
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC alpharhsL = vec_load(alpharhs[index]);
+ CCTK_REAL_VEC ArhsL = vec_load(Arhs[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At11rhsL = vec_load(At11rhs[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At12rhsL = vec_load(At12rhs[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At13rhsL = vec_load(At13rhs[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At22rhsL = vec_load(At22rhs[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At23rhsL = vec_load(At23rhs[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC At33rhsL = vec_load(At33rhs[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B1rhsL = vec_load(B1rhs[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B2rhsL = vec_load(B2rhs[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC B3rhsL = vec_load(B3rhs[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta1rhsL = vec_load(beta1rhs[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta2rhsL = vec_load(beta2rhs[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC beta3rhsL = vec_load(beta3rhs[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt11rhsL = vec_load(gt11rhs[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt12rhsL = vec_load(gt12rhs[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt13rhsL = vec_load(gt13rhs[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt22rhsL = vec_load(gt22rhs[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt23rhsL = vec_load(gt23rhs[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC gt33rhsL = vec_load(gt33rhs[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC phirhsL = vec_load(phirhs[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC trKrhsL = vec_load(trKrhs[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt1rhsL = vec_load(Xt1rhs[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt2rhsL = vec_load(Xt2rhs[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+ CCTK_REAL_VEC Xt3rhsL = vec_load(Xt3rhs[index]);
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDupwindNthAnti1A = PDupwindNthAnti1(&A[index]);
- CCTK_REAL const PDupwindNthSymm1A = PDupwindNthSymm1(&A[index]);
- CCTK_REAL const PDupwindNthAnti2A = PDupwindNthAnti2(&A[index]);
- CCTK_REAL const PDupwindNthSymm2A = PDupwindNthSymm2(&A[index]);
- CCTK_REAL const PDupwindNthAnti3A = PDupwindNthAnti3(&A[index]);
- CCTK_REAL const PDupwindNthSymm3A = PDupwindNthSymm3(&A[index]);
- CCTK_REAL const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti1At11 = PDupwindNthAnti1(&At11[index]);
- CCTK_REAL const PDupwindNthSymm1At11 = PDupwindNthSymm1(&At11[index]);
- CCTK_REAL const PDupwindNthAnti2At11 = PDupwindNthAnti2(&At11[index]);
- CCTK_REAL const PDupwindNthSymm2At11 = PDupwindNthSymm2(&At11[index]);
- CCTK_REAL const PDupwindNthAnti3At11 = PDupwindNthAnti3(&At11[index]);
- CCTK_REAL const PDupwindNthSymm3At11 = PDupwindNthSymm3(&At11[index]);
- CCTK_REAL const PDupwindNthAnti1At12 = PDupwindNthAnti1(&At12[index]);
- CCTK_REAL const PDupwindNthSymm1At12 = PDupwindNthSymm1(&At12[index]);
- CCTK_REAL const PDupwindNthAnti2At12 = PDupwindNthAnti2(&At12[index]);
- CCTK_REAL const PDupwindNthSymm2At12 = PDupwindNthSymm2(&At12[index]);
- CCTK_REAL const PDupwindNthAnti3At12 = PDupwindNthAnti3(&At12[index]);
- CCTK_REAL const PDupwindNthSymm3At12 = PDupwindNthSymm3(&At12[index]);
- CCTK_REAL const PDupwindNthAnti1At13 = PDupwindNthAnti1(&At13[index]);
- CCTK_REAL const PDupwindNthSymm1At13 = PDupwindNthSymm1(&At13[index]);
- CCTK_REAL const PDupwindNthAnti2At13 = PDupwindNthAnti2(&At13[index]);
- CCTK_REAL const PDupwindNthSymm2At13 = PDupwindNthSymm2(&At13[index]);
- CCTK_REAL const PDupwindNthAnti3At13 = PDupwindNthAnti3(&At13[index]);
- CCTK_REAL const PDupwindNthSymm3At13 = PDupwindNthSymm3(&At13[index]);
- CCTK_REAL const PDupwindNthAnti1At22 = PDupwindNthAnti1(&At22[index]);
- CCTK_REAL const PDupwindNthSymm1At22 = PDupwindNthSymm1(&At22[index]);
- CCTK_REAL const PDupwindNthAnti2At22 = PDupwindNthAnti2(&At22[index]);
- CCTK_REAL const PDupwindNthSymm2At22 = PDupwindNthSymm2(&At22[index]);
- CCTK_REAL const PDupwindNthAnti3At22 = PDupwindNthAnti3(&At22[index]);
- CCTK_REAL const PDupwindNthSymm3At22 = PDupwindNthSymm3(&At22[index]);
- CCTK_REAL const PDupwindNthAnti1At23 = PDupwindNthAnti1(&At23[index]);
- CCTK_REAL const PDupwindNthSymm1At23 = PDupwindNthSymm1(&At23[index]);
- CCTK_REAL const PDupwindNthAnti2At23 = PDupwindNthAnti2(&At23[index]);
- CCTK_REAL const PDupwindNthSymm2At23 = PDupwindNthSymm2(&At23[index]);
- CCTK_REAL const PDupwindNthAnti3At23 = PDupwindNthAnti3(&At23[index]);
- CCTK_REAL const PDupwindNthSymm3At23 = PDupwindNthSymm3(&At23[index]);
- CCTK_REAL const PDupwindNthAnti1At33 = PDupwindNthAnti1(&At33[index]);
- CCTK_REAL const PDupwindNthSymm1At33 = PDupwindNthSymm1(&At33[index]);
- CCTK_REAL const PDupwindNthAnti2At33 = PDupwindNthAnti2(&At33[index]);
- CCTK_REAL const PDupwindNthSymm2At33 = PDupwindNthSymm2(&At33[index]);
- CCTK_REAL const PDupwindNthAnti3At33 = PDupwindNthAnti3(&At33[index]);
- CCTK_REAL const PDupwindNthSymm3At33 = PDupwindNthSymm3(&At33[index]);
- CCTK_REAL const PDupwindNthAnti1B1 = PDupwindNthAnti1(&B1[index]);
- CCTK_REAL const PDupwindNthSymm1B1 = PDupwindNthSymm1(&B1[index]);
- CCTK_REAL const PDupwindNthAnti2B1 = PDupwindNthAnti2(&B1[index]);
- CCTK_REAL const PDupwindNthSymm2B1 = PDupwindNthSymm2(&B1[index]);
- CCTK_REAL const PDupwindNthAnti3B1 = PDupwindNthAnti3(&B1[index]);
- CCTK_REAL const PDupwindNthSymm3B1 = PDupwindNthSymm3(&B1[index]);
- CCTK_REAL const PDupwindNthAnti1B2 = PDupwindNthAnti1(&B2[index]);
- CCTK_REAL const PDupwindNthSymm1B2 = PDupwindNthSymm1(&B2[index]);
- CCTK_REAL const PDupwindNthAnti2B2 = PDupwindNthAnti2(&B2[index]);
- CCTK_REAL const PDupwindNthSymm2B2 = PDupwindNthSymm2(&B2[index]);
- CCTK_REAL const PDupwindNthAnti3B2 = PDupwindNthAnti3(&B2[index]);
- CCTK_REAL const PDupwindNthSymm3B2 = PDupwindNthSymm3(&B2[index]);
- CCTK_REAL const PDupwindNthAnti1B3 = PDupwindNthAnti1(&B3[index]);
- CCTK_REAL const PDupwindNthSymm1B3 = PDupwindNthSymm1(&B3[index]);
- CCTK_REAL const PDupwindNthAnti2B3 = PDupwindNthAnti2(&B3[index]);
- CCTK_REAL const PDupwindNthSymm2B3 = PDupwindNthSymm2(&B3[index]);
- CCTK_REAL const PDupwindNthAnti3B3 = PDupwindNthAnti3(&B3[index]);
- CCTK_REAL const PDupwindNthSymm3B3 = PDupwindNthSymm3(&B3[index]);
- CCTK_REAL const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti1gt11 = PDupwindNthAnti1(&gt11[index]);
- CCTK_REAL const PDupwindNthSymm1gt11 = PDupwindNthSymm1(&gt11[index]);
- CCTK_REAL const PDupwindNthAnti2gt11 = PDupwindNthAnti2(&gt11[index]);
- CCTK_REAL const PDupwindNthSymm2gt11 = PDupwindNthSymm2(&gt11[index]);
- CCTK_REAL const PDupwindNthAnti3gt11 = PDupwindNthAnti3(&gt11[index]);
- CCTK_REAL const PDupwindNthSymm3gt11 = PDupwindNthSymm3(&gt11[index]);
- CCTK_REAL const PDupwindNthAnti1gt12 = PDupwindNthAnti1(&gt12[index]);
- CCTK_REAL const PDupwindNthSymm1gt12 = PDupwindNthSymm1(&gt12[index]);
- CCTK_REAL const PDupwindNthAnti2gt12 = PDupwindNthAnti2(&gt12[index]);
- CCTK_REAL const PDupwindNthSymm2gt12 = PDupwindNthSymm2(&gt12[index]);
- CCTK_REAL const PDupwindNthAnti3gt12 = PDupwindNthAnti3(&gt12[index]);
- CCTK_REAL const PDupwindNthSymm3gt12 = PDupwindNthSymm3(&gt12[index]);
- CCTK_REAL const PDupwindNthAnti1gt13 = PDupwindNthAnti1(&gt13[index]);
- CCTK_REAL const PDupwindNthSymm1gt13 = PDupwindNthSymm1(&gt13[index]);
- CCTK_REAL const PDupwindNthAnti2gt13 = PDupwindNthAnti2(&gt13[index]);
- CCTK_REAL const PDupwindNthSymm2gt13 = PDupwindNthSymm2(&gt13[index]);
- CCTK_REAL const PDupwindNthAnti3gt13 = PDupwindNthAnti3(&gt13[index]);
- CCTK_REAL const PDupwindNthSymm3gt13 = PDupwindNthSymm3(&gt13[index]);
- CCTK_REAL const PDupwindNthAnti1gt22 = PDupwindNthAnti1(&gt22[index]);
- CCTK_REAL const PDupwindNthSymm1gt22 = PDupwindNthSymm1(&gt22[index]);
- CCTK_REAL const PDupwindNthAnti2gt22 = PDupwindNthAnti2(&gt22[index]);
- CCTK_REAL const PDupwindNthSymm2gt22 = PDupwindNthSymm2(&gt22[index]);
- CCTK_REAL const PDupwindNthAnti3gt22 = PDupwindNthAnti3(&gt22[index]);
- CCTK_REAL const PDupwindNthSymm3gt22 = PDupwindNthSymm3(&gt22[index]);
- CCTK_REAL const PDupwindNthAnti1gt23 = PDupwindNthAnti1(&gt23[index]);
- CCTK_REAL const PDupwindNthSymm1gt23 = PDupwindNthSymm1(&gt23[index]);
- CCTK_REAL const PDupwindNthAnti2gt23 = PDupwindNthAnti2(&gt23[index]);
- CCTK_REAL const PDupwindNthSymm2gt23 = PDupwindNthSymm2(&gt23[index]);
- CCTK_REAL const PDupwindNthAnti3gt23 = PDupwindNthAnti3(&gt23[index]);
- CCTK_REAL const PDupwindNthSymm3gt23 = PDupwindNthSymm3(&gt23[index]);
- CCTK_REAL const PDupwindNthAnti1gt33 = PDupwindNthAnti1(&gt33[index]);
- CCTK_REAL const PDupwindNthSymm1gt33 = PDupwindNthSymm1(&gt33[index]);
- CCTK_REAL const PDupwindNthAnti2gt33 = PDupwindNthAnti2(&gt33[index]);
- CCTK_REAL const PDupwindNthSymm2gt33 = PDupwindNthSymm2(&gt33[index]);
- CCTK_REAL const PDupwindNthAnti3gt33 = PDupwindNthAnti3(&gt33[index]);
- CCTK_REAL const PDupwindNthSymm3gt33 = PDupwindNthSymm3(&gt33[index]);
- CCTK_REAL const PDupwindNthAnti1phi = PDupwindNthAnti1(&phi[index]);
- CCTK_REAL const PDupwindNthSymm1phi = PDupwindNthSymm1(&phi[index]);
- CCTK_REAL const PDupwindNthAnti2phi = PDupwindNthAnti2(&phi[index]);
- CCTK_REAL const PDupwindNthSymm2phi = PDupwindNthSymm2(&phi[index]);
- CCTK_REAL const PDupwindNthAnti3phi = PDupwindNthAnti3(&phi[index]);
- CCTK_REAL const PDupwindNthSymm3phi = PDupwindNthSymm3(&phi[index]);
- CCTK_REAL const PDupwindNthAnti1trK = PDupwindNthAnti1(&trK[index]);
- CCTK_REAL const PDupwindNthSymm1trK = PDupwindNthSymm1(&trK[index]);
- CCTK_REAL const PDupwindNthAnti2trK = PDupwindNthAnti2(&trK[index]);
- CCTK_REAL const PDupwindNthSymm2trK = PDupwindNthSymm2(&trK[index]);
- CCTK_REAL const PDupwindNthAnti3trK = PDupwindNthAnti3(&trK[index]);
- CCTK_REAL const PDupwindNthSymm3trK = PDupwindNthSymm3(&trK[index]);
- CCTK_REAL const PDupwindNthAnti1Xt1 = PDupwindNthAnti1(&Xt1[index]);
- CCTK_REAL const PDupwindNthSymm1Xt1 = PDupwindNthSymm1(&Xt1[index]);
- CCTK_REAL const PDupwindNthAnti2Xt1 = PDupwindNthAnti2(&Xt1[index]);
- CCTK_REAL const PDupwindNthSymm2Xt1 = PDupwindNthSymm2(&Xt1[index]);
- CCTK_REAL const PDupwindNthAnti3Xt1 = PDupwindNthAnti3(&Xt1[index]);
- CCTK_REAL const PDupwindNthSymm3Xt1 = PDupwindNthSymm3(&Xt1[index]);
- CCTK_REAL const PDupwindNthAnti1Xt2 = PDupwindNthAnti1(&Xt2[index]);
- CCTK_REAL const PDupwindNthSymm1Xt2 = PDupwindNthSymm1(&Xt2[index]);
- CCTK_REAL const PDupwindNthAnti2Xt2 = PDupwindNthAnti2(&Xt2[index]);
- CCTK_REAL const PDupwindNthSymm2Xt2 = PDupwindNthSymm2(&Xt2[index]);
- CCTK_REAL const PDupwindNthAnti3Xt2 = PDupwindNthAnti3(&Xt2[index]);
- CCTK_REAL const PDupwindNthSymm3Xt2 = PDupwindNthSymm3(&Xt2[index]);
- CCTK_REAL const PDupwindNthAnti1Xt3 = PDupwindNthAnti1(&Xt3[index]);
- CCTK_REAL const PDupwindNthSymm1Xt3 = PDupwindNthSymm1(&Xt3[index]);
- CCTK_REAL const PDupwindNthAnti2Xt3 = PDupwindNthAnti2(&Xt3[index]);
- CCTK_REAL const PDupwindNthSymm2Xt3 = PDupwindNthSymm2(&Xt3[index]);
- CCTK_REAL const PDupwindNthAnti3Xt3 = PDupwindNthAnti3(&Xt3[index]);
- CCTK_REAL const PDupwindNthSymm3Xt3 = PDupwindNthSymm3(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1A = PDupwindNthAnti1(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1A = PDupwindNthSymm1(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2A = PDupwindNthAnti2(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2A = PDupwindNthSymm2(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3A = PDupwindNthAnti3(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3A = PDupwindNthSymm3(&A[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At11 = PDupwindNthAnti1(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At11 = PDupwindNthSymm1(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At11 = PDupwindNthAnti2(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At11 = PDupwindNthSymm2(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At11 = PDupwindNthAnti3(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At11 = PDupwindNthSymm3(&At11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At12 = PDupwindNthAnti1(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At12 = PDupwindNthSymm1(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At12 = PDupwindNthAnti2(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At12 = PDupwindNthSymm2(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At12 = PDupwindNthAnti3(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At12 = PDupwindNthSymm3(&At12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At13 = PDupwindNthAnti1(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At13 = PDupwindNthSymm1(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At13 = PDupwindNthAnti2(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At13 = PDupwindNthSymm2(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At13 = PDupwindNthAnti3(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At13 = PDupwindNthSymm3(&At13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At22 = PDupwindNthAnti1(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At22 = PDupwindNthSymm1(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At22 = PDupwindNthAnti2(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At22 = PDupwindNthSymm2(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At22 = PDupwindNthAnti3(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At22 = PDupwindNthSymm3(&At22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At23 = PDupwindNthAnti1(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At23 = PDupwindNthSymm1(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At23 = PDupwindNthAnti2(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At23 = PDupwindNthSymm2(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At23 = PDupwindNthAnti3(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At23 = PDupwindNthSymm3(&At23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1At33 = PDupwindNthAnti1(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1At33 = PDupwindNthSymm1(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2At33 = PDupwindNthAnti2(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2At33 = PDupwindNthSymm2(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3At33 = PDupwindNthAnti3(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3At33 = PDupwindNthSymm3(&At33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1B1 = PDupwindNthAnti1(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1B1 = PDupwindNthSymm1(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2B1 = PDupwindNthAnti2(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2B1 = PDupwindNthSymm2(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3B1 = PDupwindNthAnti3(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3B1 = PDupwindNthSymm3(&B1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1B2 = PDupwindNthAnti1(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1B2 = PDupwindNthSymm1(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2B2 = PDupwindNthAnti2(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2B2 = PDupwindNthSymm2(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3B2 = PDupwindNthAnti3(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3B2 = PDupwindNthSymm3(&B2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1B3 = PDupwindNthAnti1(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1B3 = PDupwindNthSymm1(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2B3 = PDupwindNthAnti2(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2B3 = PDupwindNthSymm2(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3B3 = PDupwindNthAnti3(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3B3 = PDupwindNthSymm3(&B3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt11 = PDupwindNthAnti1(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt11 = PDupwindNthSymm1(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt11 = PDupwindNthAnti2(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt11 = PDupwindNthSymm2(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt11 = PDupwindNthAnti3(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt11 = PDupwindNthSymm3(&gt11[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt12 = PDupwindNthAnti1(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt12 = PDupwindNthSymm1(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt12 = PDupwindNthAnti2(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt12 = PDupwindNthSymm2(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt12 = PDupwindNthAnti3(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt12 = PDupwindNthSymm3(&gt12[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt13 = PDupwindNthAnti1(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt13 = PDupwindNthSymm1(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt13 = PDupwindNthAnti2(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt13 = PDupwindNthSymm2(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt13 = PDupwindNthAnti3(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt13 = PDupwindNthSymm3(&gt13[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt22 = PDupwindNthAnti1(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt22 = PDupwindNthSymm1(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt22 = PDupwindNthAnti2(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt22 = PDupwindNthSymm2(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt22 = PDupwindNthAnti3(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt22 = PDupwindNthSymm3(&gt22[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt23 = PDupwindNthAnti1(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt23 = PDupwindNthSymm1(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt23 = PDupwindNthAnti2(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt23 = PDupwindNthSymm2(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt23 = PDupwindNthAnti3(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt23 = PDupwindNthSymm3(&gt23[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1gt33 = PDupwindNthAnti1(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1gt33 = PDupwindNthSymm1(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2gt33 = PDupwindNthAnti2(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2gt33 = PDupwindNthSymm2(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3gt33 = PDupwindNthAnti3(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3gt33 = PDupwindNthSymm3(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1phi = PDupwindNthAnti1(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1phi = PDupwindNthSymm1(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2phi = PDupwindNthAnti2(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2phi = PDupwindNthSymm2(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3phi = PDupwindNthAnti3(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3phi = PDupwindNthSymm3(&phi[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1trK = PDupwindNthAnti1(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1trK = PDupwindNthSymm1(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2trK = PDupwindNthAnti2(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2trK = PDupwindNthSymm2(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3trK = PDupwindNthAnti3(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3trK = PDupwindNthSymm3(&trK[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1Xt1 = PDupwindNthAnti1(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1Xt1 = PDupwindNthSymm1(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2Xt1 = PDupwindNthAnti2(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2Xt1 = PDupwindNthSymm2(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3Xt1 = PDupwindNthAnti3(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3Xt1 = PDupwindNthSymm3(&Xt1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1Xt2 = PDupwindNthAnti1(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1Xt2 = PDupwindNthSymm1(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2Xt2 = PDupwindNthAnti2(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2Xt2 = PDupwindNthSymm2(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3Xt2 = PDupwindNthAnti3(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3Xt2 = PDupwindNthSymm3(&Xt2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1Xt3 = PDupwindNthAnti1(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1Xt3 = PDupwindNthSymm1(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2Xt3 = PDupwindNthAnti2(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2Xt3 = PDupwindNthSymm2(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3Xt3 = PDupwindNthAnti3(&Xt3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3Xt3 = PDupwindNthSymm3(&Xt3[index]);
/* Calculate temporaries and grid functions */
ptrdiff_t dir1 = Sign(beta1L);
@@ -348,172 +350,209 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir
ptrdiff_t dir3 = Sign(beta3L);
- phirhsL = beta1L*PDupwindNthAnti1phi + beta2L*PDupwindNthAnti2phi +
- beta3L*PDupwindNthAnti3phi + phirhsL + PDupwindNthSymm1phi*Abs(beta1L)
- + PDupwindNthSymm2phi*Abs(beta2L) + PDupwindNthSymm3phi*Abs(beta3L);
-
- gt11rhsL = gt11rhsL + beta1L*PDupwindNthAnti1gt11 +
- beta2L*PDupwindNthAnti2gt11 + beta3L*PDupwindNthAnti3gt11 +
- PDupwindNthSymm1gt11*Abs(beta1L) + PDupwindNthSymm2gt11*Abs(beta2L) +
- PDupwindNthSymm3gt11*Abs(beta3L);
-
- gt12rhsL = gt12rhsL + beta1L*PDupwindNthAnti1gt12 +
- beta2L*PDupwindNthAnti2gt12 + beta3L*PDupwindNthAnti3gt12 +
- PDupwindNthSymm1gt12*Abs(beta1L) + PDupwindNthSymm2gt12*Abs(beta2L) +
- PDupwindNthSymm3gt12*Abs(beta3L);
-
- gt13rhsL = gt13rhsL + beta1L*PDupwindNthAnti1gt13 +
- beta2L*PDupwindNthAnti2gt13 + beta3L*PDupwindNthAnti3gt13 +
- PDupwindNthSymm1gt13*Abs(beta1L) + PDupwindNthSymm2gt13*Abs(beta2L) +
- PDupwindNthSymm3gt13*Abs(beta3L);
-
- gt22rhsL = gt22rhsL + beta1L*PDupwindNthAnti1gt22 +
- beta2L*PDupwindNthAnti2gt22 + beta3L*PDupwindNthAnti3gt22 +
- PDupwindNthSymm1gt22*Abs(beta1L) + PDupwindNthSymm2gt22*Abs(beta2L) +
- PDupwindNthSymm3gt22*Abs(beta3L);
-
- gt23rhsL = gt23rhsL + beta1L*PDupwindNthAnti1gt23 +
- beta2L*PDupwindNthAnti2gt23 + beta3L*PDupwindNthAnti3gt23 +
- PDupwindNthSymm1gt23*Abs(beta1L) + PDupwindNthSymm2gt23*Abs(beta2L) +
- PDupwindNthSymm3gt23*Abs(beta3L);
-
- gt33rhsL = gt33rhsL + beta1L*PDupwindNthAnti1gt33 +
- beta2L*PDupwindNthAnti2gt33 + beta3L*PDupwindNthAnti3gt33 +
- PDupwindNthSymm1gt33*Abs(beta1L) + PDupwindNthSymm2gt33*Abs(beta2L) +
- PDupwindNthSymm3gt33*Abs(beta3L);
-
- Xt1rhsL = beta1L*PDupwindNthAnti1Xt1 + beta2L*PDupwindNthAnti2Xt1 +
- beta3L*PDupwindNthAnti3Xt1 + Xt1rhsL + PDupwindNthSymm1Xt1*Abs(beta1L)
- + PDupwindNthSymm2Xt1*Abs(beta2L) + PDupwindNthSymm3Xt1*Abs(beta3L);
-
- Xt2rhsL = beta1L*PDupwindNthAnti1Xt2 + beta2L*PDupwindNthAnti2Xt2 +
- beta3L*PDupwindNthAnti3Xt2 + Xt2rhsL + PDupwindNthSymm1Xt2*Abs(beta1L)
- + PDupwindNthSymm2Xt2*Abs(beta2L) + PDupwindNthSymm3Xt2*Abs(beta3L);
-
- Xt3rhsL = beta1L*PDupwindNthAnti1Xt3 + beta2L*PDupwindNthAnti2Xt3 +
- beta3L*PDupwindNthAnti3Xt3 + Xt3rhsL + PDupwindNthSymm1Xt3*Abs(beta1L)
- + PDupwindNthSymm2Xt3*Abs(beta2L) + PDupwindNthSymm3Xt3*Abs(beta3L);
-
- trKrhsL = beta1L*PDupwindNthAnti1trK + beta2L*PDupwindNthAnti2trK +
- beta3L*PDupwindNthAnti3trK + trKrhsL + PDupwindNthSymm1trK*Abs(beta1L)
- + PDupwindNthSymm2trK*Abs(beta2L) + PDupwindNthSymm3trK*Abs(beta3L);
-
- At11rhsL = At11rhsL + beta1L*PDupwindNthAnti1At11 +
- beta2L*PDupwindNthAnti2At11 + beta3L*PDupwindNthAnti3At11 +
- PDupwindNthSymm1At11*Abs(beta1L) + PDupwindNthSymm2At11*Abs(beta2L) +
- PDupwindNthSymm3At11*Abs(beta3L);
-
- At12rhsL = At12rhsL + beta1L*PDupwindNthAnti1At12 +
- beta2L*PDupwindNthAnti2At12 + beta3L*PDupwindNthAnti3At12 +
- PDupwindNthSymm1At12*Abs(beta1L) + PDupwindNthSymm2At12*Abs(beta2L) +
- PDupwindNthSymm3At12*Abs(beta3L);
-
- At13rhsL = At13rhsL + beta1L*PDupwindNthAnti1At13 +
- beta2L*PDupwindNthAnti2At13 + beta3L*PDupwindNthAnti3At13 +
- PDupwindNthSymm1At13*Abs(beta1L) + PDupwindNthSymm2At13*Abs(beta2L) +
- PDupwindNthSymm3At13*Abs(beta3L);
-
- At22rhsL = At22rhsL + beta1L*PDupwindNthAnti1At22 +
- beta2L*PDupwindNthAnti2At22 + beta3L*PDupwindNthAnti3At22 +
- PDupwindNthSymm1At22*Abs(beta1L) + PDupwindNthSymm2At22*Abs(beta2L) +
- PDupwindNthSymm3At22*Abs(beta3L);
-
- At23rhsL = At23rhsL + beta1L*PDupwindNthAnti1At23 +
- beta2L*PDupwindNthAnti2At23 + beta3L*PDupwindNthAnti3At23 +
- PDupwindNthSymm1At23*Abs(beta1L) + PDupwindNthSymm2At23*Abs(beta2L) +
- PDupwindNthSymm3At23*Abs(beta3L);
-
- At33rhsL = At33rhsL + beta1L*PDupwindNthAnti1At33 +
- beta2L*PDupwindNthAnti2At33 + beta3L*PDupwindNthAnti3At33 +
- PDupwindNthSymm1At33*Abs(beta1L) + PDupwindNthSymm2At33*Abs(beta2L) +
- PDupwindNthSymm3At33*Abs(beta3L);
-
- alpharhsL = alpharhsL + (beta1L*PDupwindNthAnti1alpha +
- beta2L*PDupwindNthAnti2alpha + beta3L*PDupwindNthAnti3alpha +
- PDupwindNthSymm1alpha*Abs(beta1L) + PDupwindNthSymm2alpha*Abs(beta2L) +
- PDupwindNthSymm3alpha*Abs(beta3L))*ToReal(LapseAdvectionCoeff);
-
- ArhsL = ArhsL + (beta1L*PDupwindNthAnti1A + beta2L*PDupwindNthAnti2A +
- beta3L*PDupwindNthAnti3A + PDupwindNthSymm1A*Abs(beta1L) +
- PDupwindNthSymm2A*Abs(beta2L) +
- PDupwindNthSymm3A*Abs(beta3L))*ToReal(LapseAdvectionCoeff);
-
- beta1rhsL = beta1rhsL + (beta1L*PDupwindNthAnti1beta1 +
- beta2L*PDupwindNthAnti2beta1 + beta3L*PDupwindNthAnti3beta1 +
- PDupwindNthSymm1beta1*Abs(beta1L) + PDupwindNthSymm2beta1*Abs(beta2L) +
- PDupwindNthSymm3beta1*Abs(beta3L))*ToReal(ShiftAdvectionCoeff);
-
- beta2rhsL = beta2rhsL + (beta1L*PDupwindNthAnti1beta2 +
- beta2L*PDupwindNthAnti2beta2 + beta3L*PDupwindNthAnti3beta2 +
- PDupwindNthSymm1beta2*Abs(beta1L) + PDupwindNthSymm2beta2*Abs(beta2L) +
- PDupwindNthSymm3beta2*Abs(beta3L))*ToReal(ShiftAdvectionCoeff);
-
- beta3rhsL = beta3rhsL + (beta1L*PDupwindNthAnti1beta3 +
- beta2L*PDupwindNthAnti2beta3 + beta3L*PDupwindNthAnti3beta3 +
- PDupwindNthSymm1beta3*Abs(beta1L) + PDupwindNthSymm2beta3*Abs(beta2L) +
- PDupwindNthSymm3beta3*Abs(beta3L))*ToReal(ShiftAdvectionCoeff);
-
- B1rhsL = B1rhsL + (beta1L*(PDupwindNthAnti1B1 - PDupwindNthAnti1Xt1) +
- beta2L*(PDupwindNthAnti2B1 - PDupwindNthAnti2Xt1) +
- beta3L*(PDupwindNthAnti3B1 - PDupwindNthAnti3Xt1) + (PDupwindNthSymm1B1
- - PDupwindNthSymm1Xt1)*Abs(beta1L) + (PDupwindNthSymm2B1 -
- PDupwindNthSymm2Xt1)*Abs(beta2L) + (PDupwindNthSymm3B1 -
- PDupwindNthSymm3Xt1)*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- (beta1L*PDupwindNthAnti1Xt1 + beta2L*PDupwindNthAnti2Xt1 +
- beta3L*PDupwindNthAnti3Xt1 + PDupwindNthSymm1Xt1*Abs(beta1L) +
- PDupwindNthSymm2Xt1*Abs(beta2L) +
- PDupwindNthSymm3Xt1*Abs(beta3L))*ToReal(ShiftBCoeff);
-
- B2rhsL = B2rhsL + (beta1L*(PDupwindNthAnti1B2 - PDupwindNthAnti1Xt2) +
- beta2L*(PDupwindNthAnti2B2 - PDupwindNthAnti2Xt2) +
- beta3L*(PDupwindNthAnti3B2 - PDupwindNthAnti3Xt2) + (PDupwindNthSymm1B2
- - PDupwindNthSymm1Xt2)*Abs(beta1L) + (PDupwindNthSymm2B2 -
- PDupwindNthSymm2Xt2)*Abs(beta2L) + (PDupwindNthSymm3B2 -
- PDupwindNthSymm3Xt2)*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- (beta1L*PDupwindNthAnti1Xt2 + beta2L*PDupwindNthAnti2Xt2 +
- beta3L*PDupwindNthAnti3Xt2 + PDupwindNthSymm1Xt2*Abs(beta1L) +
- PDupwindNthSymm2Xt2*Abs(beta2L) +
- PDupwindNthSymm3Xt2*Abs(beta3L))*ToReal(ShiftBCoeff);
-
- B3rhsL = B3rhsL + (beta1L*(PDupwindNthAnti1B3 - PDupwindNthAnti1Xt3) +
- beta2L*(PDupwindNthAnti2B3 - PDupwindNthAnti2Xt3) +
- beta3L*(PDupwindNthAnti3B3 - PDupwindNthAnti3Xt3) + (PDupwindNthSymm1B3
- - PDupwindNthSymm1Xt3)*Abs(beta1L) + (PDupwindNthSymm2B3 -
- PDupwindNthSymm2Xt3)*Abs(beta2L) + (PDupwindNthSymm3B3 -
- PDupwindNthSymm3Xt3)*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- (beta1L*PDupwindNthAnti1Xt3 + beta2L*PDupwindNthAnti2Xt3 +
- beta3L*PDupwindNthAnti3Xt3 + PDupwindNthSymm1Xt3*Abs(beta1L) +
- PDupwindNthSymm2Xt3*Abs(beta2L) +
- PDupwindNthSymm3Xt3*Abs(beta3L))*ToReal(ShiftBCoeff);
+ phirhsL =
+ kmadd(beta1L,PDupwindNthAnti1phi,kmadd(beta2L,PDupwindNthAnti2phi,kmadd(beta3L,PDupwindNthAnti3phi,kadd(phirhsL,kmadd(PDupwindNthSymm1phi,kfabs(beta1L),kmadd(PDupwindNthSymm2phi,kfabs(beta2L),kmul(PDupwindNthSymm3phi,kfabs(beta3L))))))));
+
+ gt11rhsL =
+ kadd(gt11rhsL,kmadd(beta1L,PDupwindNthAnti1gt11,kmadd(beta2L,PDupwindNthAnti2gt11,kmadd(beta3L,PDupwindNthAnti3gt11,kmadd(PDupwindNthSymm1gt11,kfabs(beta1L),kmadd(PDupwindNthSymm2gt11,kfabs(beta2L),kmul(PDupwindNthSymm3gt11,kfabs(beta3L))))))));
+
+ gt12rhsL =
+ kadd(gt12rhsL,kmadd(beta1L,PDupwindNthAnti1gt12,kmadd(beta2L,PDupwindNthAnti2gt12,kmadd(beta3L,PDupwindNthAnti3gt12,kmadd(PDupwindNthSymm1gt12,kfabs(beta1L),kmadd(PDupwindNthSymm2gt12,kfabs(beta2L),kmul(PDupwindNthSymm3gt12,kfabs(beta3L))))))));
+
+ gt13rhsL =
+ kadd(gt13rhsL,kmadd(beta1L,PDupwindNthAnti1gt13,kmadd(beta2L,PDupwindNthAnti2gt13,kmadd(beta3L,PDupwindNthAnti3gt13,kmadd(PDupwindNthSymm1gt13,kfabs(beta1L),kmadd(PDupwindNthSymm2gt13,kfabs(beta2L),kmul(PDupwindNthSymm3gt13,kfabs(beta3L))))))));
+
+ gt22rhsL =
+ kadd(gt22rhsL,kmadd(beta1L,PDupwindNthAnti1gt22,kmadd(beta2L,PDupwindNthAnti2gt22,kmadd(beta3L,PDupwindNthAnti3gt22,kmadd(PDupwindNthSymm1gt22,kfabs(beta1L),kmadd(PDupwindNthSymm2gt22,kfabs(beta2L),kmul(PDupwindNthSymm3gt22,kfabs(beta3L))))))));
+
+ gt23rhsL =
+ kadd(gt23rhsL,kmadd(beta1L,PDupwindNthAnti1gt23,kmadd(beta2L,PDupwindNthAnti2gt23,kmadd(beta3L,PDupwindNthAnti3gt23,kmadd(PDupwindNthSymm1gt23,kfabs(beta1L),kmadd(PDupwindNthSymm2gt23,kfabs(beta2L),kmul(PDupwindNthSymm3gt23,kfabs(beta3L))))))));
+
+ gt33rhsL =
+ kadd(gt33rhsL,kmadd(beta1L,PDupwindNthAnti1gt33,kmadd(beta2L,PDupwindNthAnti2gt33,kmadd(beta3L,PDupwindNthAnti3gt33,kmadd(PDupwindNthSymm1gt33,kfabs(beta1L),kmadd(PDupwindNthSymm2gt33,kfabs(beta2L),kmul(PDupwindNthSymm3gt33,kfabs(beta3L))))))));
+
+ Xt1rhsL =
+ kmadd(beta1L,PDupwindNthAnti1Xt1,kmadd(beta2L,PDupwindNthAnti2Xt1,kmadd(beta3L,PDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(PDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt1,kfabs(beta2L),kmul(PDupwindNthSymm3Xt1,kfabs(beta3L))))))));
+
+ Xt2rhsL =
+ kmadd(beta1L,PDupwindNthAnti1Xt2,kmadd(beta2L,PDupwindNthAnti2Xt2,kmadd(beta3L,PDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(PDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt2,kfabs(beta2L),kmul(PDupwindNthSymm3Xt2,kfabs(beta3L))))))));
+
+ Xt3rhsL =
+ kmadd(beta1L,PDupwindNthAnti1Xt3,kmadd(beta2L,PDupwindNthAnti2Xt3,kmadd(beta3L,PDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(PDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt3,kfabs(beta2L),kmul(PDupwindNthSymm3Xt3,kfabs(beta3L))))))));
+
+ trKrhsL =
+ kmadd(beta1L,PDupwindNthAnti1trK,kmadd(beta2L,PDupwindNthAnti2trK,kmadd(beta3L,PDupwindNthAnti3trK,kadd(trKrhsL,kmadd(PDupwindNthSymm1trK,kfabs(beta1L),kmadd(PDupwindNthSymm2trK,kfabs(beta2L),kmul(PDupwindNthSymm3trK,kfabs(beta3L))))))));
+
+ At11rhsL =
+ kadd(At11rhsL,kmadd(beta1L,PDupwindNthAnti1At11,kmadd(beta2L,PDupwindNthAnti2At11,kmadd(beta3L,PDupwindNthAnti3At11,kmadd(PDupwindNthSymm1At11,kfabs(beta1L),kmadd(PDupwindNthSymm2At11,kfabs(beta2L),kmul(PDupwindNthSymm3At11,kfabs(beta3L))))))));
+
+ At12rhsL =
+ kadd(At12rhsL,kmadd(beta1L,PDupwindNthAnti1At12,kmadd(beta2L,PDupwindNthAnti2At12,kmadd(beta3L,PDupwindNthAnti3At12,kmadd(PDupwindNthSymm1At12,kfabs(beta1L),kmadd(PDupwindNthSymm2At12,kfabs(beta2L),kmul(PDupwindNthSymm3At12,kfabs(beta3L))))))));
+
+ At13rhsL =
+ kadd(At13rhsL,kmadd(beta1L,PDupwindNthAnti1At13,kmadd(beta2L,PDupwindNthAnti2At13,kmadd(beta3L,PDupwindNthAnti3At13,kmadd(PDupwindNthSymm1At13,kfabs(beta1L),kmadd(PDupwindNthSymm2At13,kfabs(beta2L),kmul(PDupwindNthSymm3At13,kfabs(beta3L))))))));
+
+ At22rhsL =
+ kadd(At22rhsL,kmadd(beta1L,PDupwindNthAnti1At22,kmadd(beta2L,PDupwindNthAnti2At22,kmadd(beta3L,PDupwindNthAnti3At22,kmadd(PDupwindNthSymm1At22,kfabs(beta1L),kmadd(PDupwindNthSymm2At22,kfabs(beta2L),kmul(PDupwindNthSymm3At22,kfabs(beta3L))))))));
+
+ At23rhsL =
+ kadd(At23rhsL,kmadd(beta1L,PDupwindNthAnti1At23,kmadd(beta2L,PDupwindNthAnti2At23,kmadd(beta3L,PDupwindNthAnti3At23,kmadd(PDupwindNthSymm1At23,kfabs(beta1L),kmadd(PDupwindNthSymm2At23,kfabs(beta2L),kmul(PDupwindNthSymm3At23,kfabs(beta3L))))))));
+
+ At33rhsL =
+ kadd(At33rhsL,kmadd(beta1L,PDupwindNthAnti1At33,kmadd(beta2L,PDupwindNthAnti2At33,kmadd(beta3L,PDupwindNthAnti3At33,kmadd(PDupwindNthSymm1At33,kfabs(beta1L),kmadd(PDupwindNthSymm2At33,kfabs(beta2L),kmul(PDupwindNthSymm3At33,kfabs(beta3L))))))));
+
+ alpharhsL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1alpha,kmadd(beta2L,PDupwindNthAnti2alpha,kmadd(beta3L,PDupwindNthAnti3alpha,kmadd(PDupwindNthSymm1alpha,kfabs(beta1L),kmadd(PDupwindNthSymm2alpha,kfabs(beta2L),kmul(PDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL);
+
+ ArhsL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1A,kmadd(beta2L,PDupwindNthAnti2A,kmadd(beta3L,PDupwindNthAnti3A,kmadd(PDupwindNthSymm1A,kfabs(beta1L),kmadd(PDupwindNthSymm2A,kfabs(beta2L),kmul(PDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL);
+
+ beta1rhsL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta1,kmadd(beta2L,PDupwindNthAnti2beta1,kmadd(beta3L,PDupwindNthAnti3beta1,kmadd(PDupwindNthSymm1beta1,kfabs(beta1L),kmadd(PDupwindNthSymm2beta1,kfabs(beta2L),kmul(PDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL);
+
+ beta2rhsL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta2,kmadd(beta2L,PDupwindNthAnti2beta2,kmadd(beta3L,PDupwindNthAnti3beta2,kmadd(PDupwindNthSymm1beta2,kfabs(beta1L),kmadd(PDupwindNthSymm2beta2,kfabs(beta2L),kmul(PDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta2rhsL);
+
+ beta3rhsL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta3,kmadd(beta2L,PDupwindNthAnti2beta3,kmadd(beta3L,PDupwindNthAnti3beta3,kmadd(PDupwindNthSymm1beta3,kfabs(beta1L),kmadd(PDupwindNthSymm2beta3,kfabs(beta2L),kmul(PDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL);
+
+ B1rhsL =
+ kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(PDupwindNthAnti1B1,PDupwindNthAnti1Xt1),kmadd(beta2L,ksub(PDupwindNthAnti2B1,PDupwindNthAnti2Xt1),kmadd(beta3L,ksub(PDupwindNthAnti3B1,PDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(PDupwindNthSymm1B1,PDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(PDupwindNthSymm2B1,PDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(PDupwindNthSymm3B1,PDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,PDupwindNthAnti1Xt1,kmadd(beta2L,PDupwindNthAnti2Xt1,kmadd(beta3L,PDupwindNthAnti3Xt1,kmadd(PDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt1,kfabs(beta2L),kmul(PDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff))));
+
+ B2rhsL =
+ kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(PDupwindNthAnti1B2,PDupwindNthAnti1Xt2),kmadd(beta2L,ksub(PDupwindNthAnti2B2,PDupwindNthAnti2Xt2),kmadd(beta3L,ksub(PDupwindNthAnti3B2,PDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(PDupwindNthSymm1B2,PDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(PDupwindNthSymm2B2,PDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(PDupwindNthSymm3B2,PDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,PDupwindNthAnti1Xt2,kmadd(beta2L,PDupwindNthAnti2Xt2,kmadd(beta3L,PDupwindNthAnti3Xt2,kmadd(PDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt2,kfabs(beta2L),kmul(PDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff))));
+
+ B3rhsL =
+ kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(PDupwindNthAnti1B3,PDupwindNthAnti1Xt3),kmadd(beta2L,ksub(PDupwindNthAnti2B3,PDupwindNthAnti2Xt3),kmadd(beta3L,ksub(PDupwindNthAnti3B3,PDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(PDupwindNthSymm1B3,PDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(PDupwindNthSymm2B3,PDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(PDupwindNthSymm3B3,PDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,PDupwindNthAnti1Xt3,kmadd(beta2L,PDupwindNthAnti2Xt3,kmadd(beta3L,PDupwindNthAnti3Xt3,kmadd(PDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(PDupwindNthSymm2Xt3,kfabs(beta2L),kmul(PDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_Advect);
+ LC_ENDLOOP3VEC (ML_BSSN_Advect);
}
extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_Dissipation.cc b/ML_BSSN/src/ML_BSSN_Dissipation.cc
index 110c5bb..524b6d8 100644
--- a/ML_BSSN/src/ML_BSSN_Dissipation.cc
+++ b/ML_BSSN/src/ML_BSSN_Dissipation.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_Dissipation_SelectBCs(CCTK_ARGUMENTS)
{
@@ -89,298 +90,394 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_Dissipation,
+ LC_LOOP3VEC (ML_BSSN_Dissipation,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL alpharhsL = alpharhs[index];
- CCTK_REAL ArhsL = Arhs[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At11rhsL = At11rhs[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At12rhsL = At12rhs[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At13rhsL = At13rhs[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At22rhsL = At22rhs[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At23rhsL = At23rhs[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL At33rhsL = At33rhs[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B1rhsL = B1rhs[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B2rhsL = B2rhs[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL B3rhsL = B3rhs[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta1rhsL = beta1rhs[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta2rhsL = beta2rhs[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL beta3rhsL = beta3rhs[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt11rhsL = gt11rhs[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt12rhsL = gt12rhs[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt13rhsL = gt13rhs[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt22rhsL = gt22rhs[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt23rhsL = gt23rhs[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL gt33rhsL = gt33rhs[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL phirhsL = phirhs[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL trKrhsL = trKrhs[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt1rhsL = Xt1rhs[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt2rhsL = Xt2rhs[index];
- CCTK_REAL Xt3L = Xt3[index];
- CCTK_REAL Xt3rhsL = Xt3rhs[index];
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC alpharhsL = vec_load(alpharhs[index]);
+ CCTK_REAL_VEC ArhsL = vec_load(Arhs[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At11rhsL = vec_load(At11rhs[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At12rhsL = vec_load(At12rhs[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At13rhsL = vec_load(At13rhs[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At22rhsL = vec_load(At22rhs[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At23rhsL = vec_load(At23rhs[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC At33rhsL = vec_load(At33rhs[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B1rhsL = vec_load(B1rhs[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B2rhsL = vec_load(B2rhs[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC B3rhsL = vec_load(B3rhs[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta1rhsL = vec_load(beta1rhs[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta2rhsL = vec_load(beta2rhs[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC beta3rhsL = vec_load(beta3rhs[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt11rhsL = vec_load(gt11rhs[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt12rhsL = vec_load(gt12rhs[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt13rhsL = vec_load(gt13rhs[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt22rhsL = vec_load(gt22rhs[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt23rhsL = vec_load(gt23rhs[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC gt33rhsL = vec_load(gt33rhs[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC phirhsL = vec_load(phirhs[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC trKrhsL = vec_load(trKrhs[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt1rhsL = vec_load(Xt1rhs[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt2rhsL = vec_load(Xt2rhs[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+ CCTK_REAL_VEC Xt3rhsL = vec_load(Xt3rhs[index]);
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDdissipationNth1A = PDdissipationNth1(&A[index]);
- CCTK_REAL const PDdissipationNth2A = PDdissipationNth2(&A[index]);
- CCTK_REAL const PDdissipationNth3A = PDdissipationNth3(&A[index]);
- CCTK_REAL const PDdissipationNth1alpha = PDdissipationNth1(&alpha[index]);
- CCTK_REAL const PDdissipationNth2alpha = PDdissipationNth2(&alpha[index]);
- CCTK_REAL const PDdissipationNth3alpha = PDdissipationNth3(&alpha[index]);
- CCTK_REAL const PDdissipationNth1At11 = PDdissipationNth1(&At11[index]);
- CCTK_REAL const PDdissipationNth2At11 = PDdissipationNth2(&At11[index]);
- CCTK_REAL const PDdissipationNth3At11 = PDdissipationNth3(&At11[index]);
- CCTK_REAL const PDdissipationNth1At12 = PDdissipationNth1(&At12[index]);
- CCTK_REAL const PDdissipationNth2At12 = PDdissipationNth2(&At12[index]);
- CCTK_REAL const PDdissipationNth3At12 = PDdissipationNth3(&At12[index]);
- CCTK_REAL const PDdissipationNth1At13 = PDdissipationNth1(&At13[index]);
- CCTK_REAL const PDdissipationNth2At13 = PDdissipationNth2(&At13[index]);
- CCTK_REAL const PDdissipationNth3At13 = PDdissipationNth3(&At13[index]);
- CCTK_REAL const PDdissipationNth1At22 = PDdissipationNth1(&At22[index]);
- CCTK_REAL const PDdissipationNth2At22 = PDdissipationNth2(&At22[index]);
- CCTK_REAL const PDdissipationNth3At22 = PDdissipationNth3(&At22[index]);
- CCTK_REAL const PDdissipationNth1At23 = PDdissipationNth1(&At23[index]);
- CCTK_REAL const PDdissipationNth2At23 = PDdissipationNth2(&At23[index]);
- CCTK_REAL const PDdissipationNth3At23 = PDdissipationNth3(&At23[index]);
- CCTK_REAL const PDdissipationNth1At33 = PDdissipationNth1(&At33[index]);
- CCTK_REAL const PDdissipationNth2At33 = PDdissipationNth2(&At33[index]);
- CCTK_REAL const PDdissipationNth3At33 = PDdissipationNth3(&At33[index]);
- CCTK_REAL const PDdissipationNth1B1 = PDdissipationNth1(&B1[index]);
- CCTK_REAL const PDdissipationNth2B1 = PDdissipationNth2(&B1[index]);
- CCTK_REAL const PDdissipationNth3B1 = PDdissipationNth3(&B1[index]);
- CCTK_REAL const PDdissipationNth1B2 = PDdissipationNth1(&B2[index]);
- CCTK_REAL const PDdissipationNth2B2 = PDdissipationNth2(&B2[index]);
- CCTK_REAL const PDdissipationNth3B2 = PDdissipationNth3(&B2[index]);
- CCTK_REAL const PDdissipationNth1B3 = PDdissipationNth1(&B3[index]);
- CCTK_REAL const PDdissipationNth2B3 = PDdissipationNth2(&B3[index]);
- CCTK_REAL const PDdissipationNth3B3 = PDdissipationNth3(&B3[index]);
- CCTK_REAL const PDdissipationNth1beta1 = PDdissipationNth1(&beta1[index]);
- CCTK_REAL const PDdissipationNth2beta1 = PDdissipationNth2(&beta1[index]);
- CCTK_REAL const PDdissipationNth3beta1 = PDdissipationNth3(&beta1[index]);
- CCTK_REAL const PDdissipationNth1beta2 = PDdissipationNth1(&beta2[index]);
- CCTK_REAL const PDdissipationNth2beta2 = PDdissipationNth2(&beta2[index]);
- CCTK_REAL const PDdissipationNth3beta2 = PDdissipationNth3(&beta2[index]);
- CCTK_REAL const PDdissipationNth1beta3 = PDdissipationNth1(&beta3[index]);
- CCTK_REAL const PDdissipationNth2beta3 = PDdissipationNth2(&beta3[index]);
- CCTK_REAL const PDdissipationNth3beta3 = PDdissipationNth3(&beta3[index]);
- CCTK_REAL const PDdissipationNth1gt11 = PDdissipationNth1(&gt11[index]);
- CCTK_REAL const PDdissipationNth2gt11 = PDdissipationNth2(&gt11[index]);
- CCTK_REAL const PDdissipationNth3gt11 = PDdissipationNth3(&gt11[index]);
- CCTK_REAL const PDdissipationNth1gt12 = PDdissipationNth1(&gt12[index]);
- CCTK_REAL const PDdissipationNth2gt12 = PDdissipationNth2(&gt12[index]);
- CCTK_REAL const PDdissipationNth3gt12 = PDdissipationNth3(&gt12[index]);
- CCTK_REAL const PDdissipationNth1gt13 = PDdissipationNth1(&gt13[index]);
- CCTK_REAL const PDdissipationNth2gt13 = PDdissipationNth2(&gt13[index]);
- CCTK_REAL const PDdissipationNth3gt13 = PDdissipationNth3(&gt13[index]);
- CCTK_REAL const PDdissipationNth1gt22 = PDdissipationNth1(&gt22[index]);
- CCTK_REAL const PDdissipationNth2gt22 = PDdissipationNth2(&gt22[index]);
- CCTK_REAL const PDdissipationNth3gt22 = PDdissipationNth3(&gt22[index]);
- CCTK_REAL const PDdissipationNth1gt23 = PDdissipationNth1(&gt23[index]);
- CCTK_REAL const PDdissipationNth2gt23 = PDdissipationNth2(&gt23[index]);
- CCTK_REAL const PDdissipationNth3gt23 = PDdissipationNth3(&gt23[index]);
- CCTK_REAL const PDdissipationNth1gt33 = PDdissipationNth1(&gt33[index]);
- CCTK_REAL const PDdissipationNth2gt33 = PDdissipationNth2(&gt33[index]);
- CCTK_REAL const PDdissipationNth3gt33 = PDdissipationNth3(&gt33[index]);
- CCTK_REAL const PDdissipationNth1phi = PDdissipationNth1(&phi[index]);
- CCTK_REAL const PDdissipationNth2phi = PDdissipationNth2(&phi[index]);
- CCTK_REAL const PDdissipationNth3phi = PDdissipationNth3(&phi[index]);
- CCTK_REAL const PDdissipationNth1trK = PDdissipationNth1(&trK[index]);
- CCTK_REAL const PDdissipationNth2trK = PDdissipationNth2(&trK[index]);
- CCTK_REAL const PDdissipationNth3trK = PDdissipationNth3(&trK[index]);
- CCTK_REAL const PDdissipationNth1Xt1 = PDdissipationNth1(&Xt1[index]);
- CCTK_REAL const PDdissipationNth2Xt1 = PDdissipationNth2(&Xt1[index]);
- CCTK_REAL const PDdissipationNth3Xt1 = PDdissipationNth3(&Xt1[index]);
- CCTK_REAL const PDdissipationNth1Xt2 = PDdissipationNth1(&Xt2[index]);
- CCTK_REAL const PDdissipationNth2Xt2 = PDdissipationNth2(&Xt2[index]);
- CCTK_REAL const PDdissipationNth3Xt2 = PDdissipationNth3(&Xt2[index]);
- CCTK_REAL const PDdissipationNth1Xt3 = PDdissipationNth1(&Xt3[index]);
- CCTK_REAL const PDdissipationNth2Xt3 = PDdissipationNth2(&Xt3[index]);
- CCTK_REAL const PDdissipationNth3Xt3 = PDdissipationNth3(&Xt3[index]);
+ CCTK_REAL_VEC const PDdissipationNth1A = PDdissipationNth1(&A[index]);
+ CCTK_REAL_VEC const PDdissipationNth2A = PDdissipationNth2(&A[index]);
+ CCTK_REAL_VEC const PDdissipationNth3A = PDdissipationNth3(&A[index]);
+ CCTK_REAL_VEC const PDdissipationNth1alpha = PDdissipationNth1(&alpha[index]);
+ CCTK_REAL_VEC const PDdissipationNth2alpha = PDdissipationNth2(&alpha[index]);
+ CCTK_REAL_VEC const PDdissipationNth3alpha = PDdissipationNth3(&alpha[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At11 = PDdissipationNth1(&At11[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At11 = PDdissipationNth2(&At11[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At11 = PDdissipationNth3(&At11[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At12 = PDdissipationNth1(&At12[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At12 = PDdissipationNth2(&At12[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At12 = PDdissipationNth3(&At12[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At13 = PDdissipationNth1(&At13[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At13 = PDdissipationNth2(&At13[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At13 = PDdissipationNth3(&At13[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At22 = PDdissipationNth1(&At22[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At22 = PDdissipationNth2(&At22[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At22 = PDdissipationNth3(&At22[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At23 = PDdissipationNth1(&At23[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At23 = PDdissipationNth2(&At23[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At23 = PDdissipationNth3(&At23[index]);
+ CCTK_REAL_VEC const PDdissipationNth1At33 = PDdissipationNth1(&At33[index]);
+ CCTK_REAL_VEC const PDdissipationNth2At33 = PDdissipationNth2(&At33[index]);
+ CCTK_REAL_VEC const PDdissipationNth3At33 = PDdissipationNth3(&At33[index]);
+ CCTK_REAL_VEC const PDdissipationNth1B1 = PDdissipationNth1(&B1[index]);
+ CCTK_REAL_VEC const PDdissipationNth2B1 = PDdissipationNth2(&B1[index]);
+ CCTK_REAL_VEC const PDdissipationNth3B1 = PDdissipationNth3(&B1[index]);
+ CCTK_REAL_VEC const PDdissipationNth1B2 = PDdissipationNth1(&B2[index]);
+ CCTK_REAL_VEC const PDdissipationNth2B2 = PDdissipationNth2(&B2[index]);
+ CCTK_REAL_VEC const PDdissipationNth3B2 = PDdissipationNth3(&B2[index]);
+ CCTK_REAL_VEC const PDdissipationNth1B3 = PDdissipationNth1(&B3[index]);
+ CCTK_REAL_VEC const PDdissipationNth2B3 = PDdissipationNth2(&B3[index]);
+ CCTK_REAL_VEC const PDdissipationNth3B3 = PDdissipationNth3(&B3[index]);
+ CCTK_REAL_VEC const PDdissipationNth1beta1 = PDdissipationNth1(&beta1[index]);
+ CCTK_REAL_VEC const PDdissipationNth2beta1 = PDdissipationNth2(&beta1[index]);
+ CCTK_REAL_VEC const PDdissipationNth3beta1 = PDdissipationNth3(&beta1[index]);
+ CCTK_REAL_VEC const PDdissipationNth1beta2 = PDdissipationNth1(&beta2[index]);
+ CCTK_REAL_VEC const PDdissipationNth2beta2 = PDdissipationNth2(&beta2[index]);
+ CCTK_REAL_VEC const PDdissipationNth3beta2 = PDdissipationNth3(&beta2[index]);
+ CCTK_REAL_VEC const PDdissipationNth1beta3 = PDdissipationNth1(&beta3[index]);
+ CCTK_REAL_VEC const PDdissipationNth2beta3 = PDdissipationNth2(&beta3[index]);
+ CCTK_REAL_VEC const PDdissipationNth3beta3 = PDdissipationNth3(&beta3[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt11 = PDdissipationNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt11 = PDdissipationNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt11 = PDdissipationNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt12 = PDdissipationNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt12 = PDdissipationNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt12 = PDdissipationNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt13 = PDdissipationNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt13 = PDdissipationNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt13 = PDdissipationNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt22 = PDdissipationNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt22 = PDdissipationNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt22 = PDdissipationNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt23 = PDdissipationNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt23 = PDdissipationNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt23 = PDdissipationNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDdissipationNth1gt33 = PDdissipationNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDdissipationNth2gt33 = PDdissipationNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDdissipationNth3gt33 = PDdissipationNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDdissipationNth1phi = PDdissipationNth1(&phi[index]);
+ CCTK_REAL_VEC const PDdissipationNth2phi = PDdissipationNth2(&phi[index]);
+ CCTK_REAL_VEC const PDdissipationNth3phi = PDdissipationNth3(&phi[index]);
+ CCTK_REAL_VEC const PDdissipationNth1trK = PDdissipationNth1(&trK[index]);
+ CCTK_REAL_VEC const PDdissipationNth2trK = PDdissipationNth2(&trK[index]);
+ CCTK_REAL_VEC const PDdissipationNth3trK = PDdissipationNth3(&trK[index]);
+ CCTK_REAL_VEC const PDdissipationNth1Xt1 = PDdissipationNth1(&Xt1[index]);
+ CCTK_REAL_VEC const PDdissipationNth2Xt1 = PDdissipationNth2(&Xt1[index]);
+ CCTK_REAL_VEC const PDdissipationNth3Xt1 = PDdissipationNth3(&Xt1[index]);
+ CCTK_REAL_VEC const PDdissipationNth1Xt2 = PDdissipationNth1(&Xt2[index]);
+ CCTK_REAL_VEC const PDdissipationNth2Xt2 = PDdissipationNth2(&Xt2[index]);
+ CCTK_REAL_VEC const PDdissipationNth3Xt2 = PDdissipationNth3(&Xt2[index]);
+ CCTK_REAL_VEC const PDdissipationNth1Xt3 = PDdissipationNth1(&Xt3[index]);
+ CCTK_REAL_VEC const PDdissipationNth2Xt3 = PDdissipationNth2(&Xt3[index]);
+ CCTK_REAL_VEC const PDdissipationNth3Xt3 = PDdissipationNth3(&Xt3[index]);
/* Calculate temporaries and grid functions */
- CCTK_REAL epsdiss1 = ToReal(EpsDiss);
+ CCTK_REAL_VEC epsdiss1 = ToReal(EpsDiss);
- CCTK_REAL epsdiss2 = ToReal(EpsDiss);
+ CCTK_REAL_VEC epsdiss2 = ToReal(EpsDiss);
- CCTK_REAL epsdiss3 = ToReal(EpsDiss);
+ CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss);
- phirhsL = epsdiss1*PDdissipationNth1phi +
- epsdiss2*PDdissipationNth2phi + epsdiss3*PDdissipationNth3phi +
- phirhsL;
+ phirhsL =
+ kmadd(epsdiss1,PDdissipationNth1phi,kmadd(epsdiss2,PDdissipationNth2phi,kmadd(epsdiss3,PDdissipationNth3phi,phirhsL)));
- gt11rhsL = gt11rhsL + epsdiss1*PDdissipationNth1gt11 +
- epsdiss2*PDdissipationNth2gt11 + epsdiss3*PDdissipationNth3gt11;
+ gt11rhsL =
+ kadd(gt11rhsL,kmadd(epsdiss1,PDdissipationNth1gt11,kmadd(epsdiss2,PDdissipationNth2gt11,kmul(epsdiss3,PDdissipationNth3gt11))));
- gt12rhsL = gt12rhsL + epsdiss1*PDdissipationNth1gt12 +
- epsdiss2*PDdissipationNth2gt12 + epsdiss3*PDdissipationNth3gt12;
+ gt12rhsL =
+ kadd(gt12rhsL,kmadd(epsdiss1,PDdissipationNth1gt12,kmadd(epsdiss2,PDdissipationNth2gt12,kmul(epsdiss3,PDdissipationNth3gt12))));
- gt13rhsL = gt13rhsL + epsdiss1*PDdissipationNth1gt13 +
- epsdiss2*PDdissipationNth2gt13 + epsdiss3*PDdissipationNth3gt13;
+ gt13rhsL =
+ kadd(gt13rhsL,kmadd(epsdiss1,PDdissipationNth1gt13,kmadd(epsdiss2,PDdissipationNth2gt13,kmul(epsdiss3,PDdissipationNth3gt13))));
- gt22rhsL = gt22rhsL + epsdiss1*PDdissipationNth1gt22 +
- epsdiss2*PDdissipationNth2gt22 + epsdiss3*PDdissipationNth3gt22;
+ gt22rhsL =
+ kadd(gt22rhsL,kmadd(epsdiss1,PDdissipationNth1gt22,kmadd(epsdiss2,PDdissipationNth2gt22,kmul(epsdiss3,PDdissipationNth3gt22))));
- gt23rhsL = gt23rhsL + epsdiss1*PDdissipationNth1gt23 +
- epsdiss2*PDdissipationNth2gt23 + epsdiss3*PDdissipationNth3gt23;
+ gt23rhsL =
+ kadd(gt23rhsL,kmadd(epsdiss1,PDdissipationNth1gt23,kmadd(epsdiss2,PDdissipationNth2gt23,kmul(epsdiss3,PDdissipationNth3gt23))));
- gt33rhsL = gt33rhsL + epsdiss1*PDdissipationNth1gt33 +
- epsdiss2*PDdissipationNth2gt33 + epsdiss3*PDdissipationNth3gt33;
+ gt33rhsL =
+ kadd(gt33rhsL,kmadd(epsdiss1,PDdissipationNth1gt33,kmadd(epsdiss2,PDdissipationNth2gt33,kmul(epsdiss3,PDdissipationNth3gt33))));
- Xt1rhsL = epsdiss1*PDdissipationNth1Xt1 +
- epsdiss2*PDdissipationNth2Xt1 + epsdiss3*PDdissipationNth3Xt1 +
- Xt1rhsL;
-
- Xt2rhsL = epsdiss1*PDdissipationNth1Xt2 +
- epsdiss2*PDdissipationNth2Xt2 + epsdiss3*PDdissipationNth3Xt2 +
- Xt2rhsL;
-
- Xt3rhsL = epsdiss1*PDdissipationNth1Xt3 +
- epsdiss2*PDdissipationNth2Xt3 + epsdiss3*PDdissipationNth3Xt3 +
- Xt3rhsL;
-
- trKrhsL = epsdiss1*PDdissipationNth1trK +
- epsdiss2*PDdissipationNth2trK + epsdiss3*PDdissipationNth3trK +
- trKrhsL;
-
- At11rhsL = At11rhsL + epsdiss1*PDdissipationNth1At11 +
- epsdiss2*PDdissipationNth2At11 + epsdiss3*PDdissipationNth3At11;
-
- At12rhsL = At12rhsL + epsdiss1*PDdissipationNth1At12 +
- epsdiss2*PDdissipationNth2At12 + epsdiss3*PDdissipationNth3At12;
-
- At13rhsL = At13rhsL + epsdiss1*PDdissipationNth1At13 +
- epsdiss2*PDdissipationNth2At13 + epsdiss3*PDdissipationNth3At13;
-
- At22rhsL = At22rhsL + epsdiss1*PDdissipationNth1At22 +
- epsdiss2*PDdissipationNth2At22 + epsdiss3*PDdissipationNth3At22;
-
- At23rhsL = At23rhsL + epsdiss1*PDdissipationNth1At23 +
- epsdiss2*PDdissipationNth2At23 + epsdiss3*PDdissipationNth3At23;
-
- At33rhsL = At33rhsL + epsdiss1*PDdissipationNth1At33 +
- epsdiss2*PDdissipationNth2At33 + epsdiss3*PDdissipationNth3At33;
-
- alpharhsL = alpharhsL + epsdiss1*PDdissipationNth1alpha +
- epsdiss2*PDdissipationNth2alpha + epsdiss3*PDdissipationNth3alpha;
-
- ArhsL = ArhsL + epsdiss1*PDdissipationNth1A +
- epsdiss2*PDdissipationNth2A + epsdiss3*PDdissipationNth3A;
-
- beta1rhsL = beta1rhsL + epsdiss1*PDdissipationNth1beta1 +
- epsdiss2*PDdissipationNth2beta1 + epsdiss3*PDdissipationNth3beta1;
-
- beta2rhsL = beta2rhsL + epsdiss1*PDdissipationNth1beta2 +
- epsdiss2*PDdissipationNth2beta2 + epsdiss3*PDdissipationNth3beta2;
-
- beta3rhsL = beta3rhsL + epsdiss1*PDdissipationNth1beta3 +
- epsdiss2*PDdissipationNth2beta3 + epsdiss3*PDdissipationNth3beta3;
-
- B1rhsL = B1rhsL + epsdiss1*PDdissipationNth1B1 +
- epsdiss2*PDdissipationNth2B1 + epsdiss3*PDdissipationNth3B1;
-
- B2rhsL = B2rhsL + epsdiss1*PDdissipationNth1B2 +
- epsdiss2*PDdissipationNth2B2 + epsdiss3*PDdissipationNth3B2;
-
- B3rhsL = B3rhsL + epsdiss1*PDdissipationNth1B3 +
- epsdiss2*PDdissipationNth2B3 + epsdiss3*PDdissipationNth3B3;
+ Xt1rhsL =
+ kmadd(epsdiss1,PDdissipationNth1Xt1,kmadd(epsdiss2,PDdissipationNth2Xt1,kmadd(epsdiss3,PDdissipationNth3Xt1,Xt1rhsL)));
+
+ Xt2rhsL =
+ kmadd(epsdiss1,PDdissipationNth1Xt2,kmadd(epsdiss2,PDdissipationNth2Xt2,kmadd(epsdiss3,PDdissipationNth3Xt2,Xt2rhsL)));
+
+ Xt3rhsL =
+ kmadd(epsdiss1,PDdissipationNth1Xt3,kmadd(epsdiss2,PDdissipationNth2Xt3,kmadd(epsdiss3,PDdissipationNth3Xt3,Xt3rhsL)));
+
+ trKrhsL =
+ kmadd(epsdiss1,PDdissipationNth1trK,kmadd(epsdiss2,PDdissipationNth2trK,kmadd(epsdiss3,PDdissipationNth3trK,trKrhsL)));
+
+ At11rhsL =
+ kadd(At11rhsL,kmadd(epsdiss1,PDdissipationNth1At11,kmadd(epsdiss2,PDdissipationNth2At11,kmul(epsdiss3,PDdissipationNth3At11))));
+
+ At12rhsL =
+ kadd(At12rhsL,kmadd(epsdiss1,PDdissipationNth1At12,kmadd(epsdiss2,PDdissipationNth2At12,kmul(epsdiss3,PDdissipationNth3At12))));
+
+ At13rhsL =
+ kadd(At13rhsL,kmadd(epsdiss1,PDdissipationNth1At13,kmadd(epsdiss2,PDdissipationNth2At13,kmul(epsdiss3,PDdissipationNth3At13))));
+
+ At22rhsL =
+ kadd(At22rhsL,kmadd(epsdiss1,PDdissipationNth1At22,kmadd(epsdiss2,PDdissipationNth2At22,kmul(epsdiss3,PDdissipationNth3At22))));
+
+ At23rhsL =
+ kadd(At23rhsL,kmadd(epsdiss1,PDdissipationNth1At23,kmadd(epsdiss2,PDdissipationNth2At23,kmul(epsdiss3,PDdissipationNth3At23))));
+
+ At33rhsL =
+ kadd(At33rhsL,kmadd(epsdiss1,PDdissipationNth1At33,kmadd(epsdiss2,PDdissipationNth2At33,kmul(epsdiss3,PDdissipationNth3At33))));
+
+ alpharhsL =
+ kadd(alpharhsL,kmadd(epsdiss1,PDdissipationNth1alpha,kmadd(epsdiss2,PDdissipationNth2alpha,kmul(epsdiss3,PDdissipationNth3alpha))));
+
+ ArhsL =
+ kadd(ArhsL,kmadd(epsdiss1,PDdissipationNth1A,kmadd(epsdiss2,PDdissipationNth2A,kmul(epsdiss3,PDdissipationNth3A))));
+
+ beta1rhsL =
+ kadd(beta1rhsL,kmadd(epsdiss1,PDdissipationNth1beta1,kmadd(epsdiss2,PDdissipationNth2beta1,kmul(epsdiss3,PDdissipationNth3beta1))));
+
+ beta2rhsL =
+ kadd(beta2rhsL,kmadd(epsdiss1,PDdissipationNth1beta2,kmadd(epsdiss2,PDdissipationNth2beta2,kmul(epsdiss3,PDdissipationNth3beta2))));
+
+ beta3rhsL =
+ kadd(beta3rhsL,kmadd(epsdiss1,PDdissipationNth1beta3,kmadd(epsdiss2,PDdissipationNth2beta3,kmul(epsdiss3,PDdissipationNth3beta3))));
+
+ B1rhsL =
+ kadd(B1rhsL,kmadd(epsdiss1,PDdissipationNth1B1,kmadd(epsdiss2,PDdissipationNth2B1,kmul(epsdiss3,PDdissipationNth3B1))));
+
+ B2rhsL =
+ kadd(B2rhsL,kmadd(epsdiss1,PDdissipationNth1B2,kmadd(epsdiss2,PDdissipationNth2B2,kmul(epsdiss3,PDdissipationNth3B2))));
+
+ B3rhsL =
+ kadd(B3rhsL,kmadd(epsdiss1,PDdissipationNth1B3,kmadd(epsdiss2,PDdissipationNth2B3,kmul(epsdiss3,PDdissipationNth3B3))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_Dissipation);
+ LC_ENDLOOP3VEC (ML_BSSN_Dissipation);
}
extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_InitGamma.cc b/ML_BSSN/src/ML_BSSN_InitGamma.cc
index 18a9b97..ab44fd1 100644
--- a/ML_BSSN/src/ML_BSSN_InitGamma.cc
+++ b/ML_BSSN/src/ML_BSSN_InitGamma.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,47 +53,48 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_InitGamma,
+ LC_LOOP3VEC (ML_BSSN_InitGamma,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -105,18 +107,52 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL Xt1L = 0;
+ CCTK_REAL_VEC Xt1L = ToReal(0);
- CCTK_REAL Xt2L = 0;
+ CCTK_REAL_VEC Xt2L = ToReal(0);
- CCTK_REAL Xt3L = 0;
+ CCTK_REAL_VEC Xt3L = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- Xt1[index] = Xt1L;
- Xt2[index] = Xt2L;
- Xt3[index] = Xt3L;
+ vec_store_nta(Xt1[index],Xt1L);
+ vec_store_nta(Xt2[index],Xt2L);
+ vec_store_nta(Xt3[index],Xt3L);
}
- LC_ENDLOOP3 (ML_BSSN_InitGamma);
+ LC_ENDLOOP3VEC (ML_BSSN_InitGamma);
}
extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_InitRHS.cc b/ML_BSSN/src/ML_BSSN_InitRHS.cc
index 24f5ae8..140c059 100644
--- a/ML_BSSN/src/ML_BSSN_InitRHS.cc
+++ b/ML_BSSN/src/ML_BSSN_InitRHS.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,47 +53,48 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_InitRHS,
+ LC_LOOP3VEC (ML_BSSN_InitRHS,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -105,84 +107,184 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL phirhsL = 0;
+ CCTK_REAL_VEC phirhsL = ToReal(0);
- CCTK_REAL gt11rhsL = 0;
+ CCTK_REAL_VEC gt11rhsL = ToReal(0);
- CCTK_REAL gt12rhsL = 0;
+ CCTK_REAL_VEC gt12rhsL = ToReal(0);
- CCTK_REAL gt13rhsL = 0;
+ CCTK_REAL_VEC gt13rhsL = ToReal(0);
- CCTK_REAL gt22rhsL = 0;
+ CCTK_REAL_VEC gt22rhsL = ToReal(0);
- CCTK_REAL gt23rhsL = 0;
+ CCTK_REAL_VEC gt23rhsL = ToReal(0);
- CCTK_REAL gt33rhsL = 0;
+ CCTK_REAL_VEC gt33rhsL = ToReal(0);
- CCTK_REAL trKrhsL = 0;
+ CCTK_REAL_VEC trKrhsL = ToReal(0);
- CCTK_REAL At11rhsL = 0;
+ CCTK_REAL_VEC At11rhsL = ToReal(0);
- CCTK_REAL At12rhsL = 0;
+ CCTK_REAL_VEC At12rhsL = ToReal(0);
- CCTK_REAL At13rhsL = 0;
+ CCTK_REAL_VEC At13rhsL = ToReal(0);
- CCTK_REAL At22rhsL = 0;
+ CCTK_REAL_VEC At22rhsL = ToReal(0);
- CCTK_REAL At23rhsL = 0;
+ CCTK_REAL_VEC At23rhsL = ToReal(0);
- CCTK_REAL At33rhsL = 0;
+ CCTK_REAL_VEC At33rhsL = ToReal(0);
- CCTK_REAL Xt1rhsL = 0;
+ CCTK_REAL_VEC Xt1rhsL = ToReal(0);
- CCTK_REAL Xt2rhsL = 0;
+ CCTK_REAL_VEC Xt2rhsL = ToReal(0);
- CCTK_REAL Xt3rhsL = 0;
+ CCTK_REAL_VEC Xt3rhsL = ToReal(0);
- CCTK_REAL alpharhsL = 0;
+ CCTK_REAL_VEC alpharhsL = ToReal(0);
- CCTK_REAL ArhsL = 0;
+ CCTK_REAL_VEC ArhsL = ToReal(0);
- CCTK_REAL beta1rhsL = 0;
+ CCTK_REAL_VEC beta1rhsL = ToReal(0);
- CCTK_REAL beta2rhsL = 0;
+ CCTK_REAL_VEC beta2rhsL = ToReal(0);
- CCTK_REAL beta3rhsL = 0;
+ CCTK_REAL_VEC beta3rhsL = ToReal(0);
- CCTK_REAL B1rhsL = 0;
+ CCTK_REAL_VEC B1rhsL = ToReal(0);
- CCTK_REAL B2rhsL = 0;
+ CCTK_REAL_VEC B2rhsL = ToReal(0);
- CCTK_REAL B3rhsL = 0;
+ CCTK_REAL_VEC B3rhsL = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_InitRHS);
+ LC_ENDLOOP3VEC (ML_BSSN_InitRHS);
}
extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_Minkowski.cc b/ML_BSSN/src/ML_BSSN_Minkowski.cc
index 77b7e2d..7efbe04 100644
--- a/ML_BSSN/src/ML_BSSN_Minkowski.cc
+++ b/ML_BSSN/src/ML_BSSN_Minkowski.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,47 +53,48 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_Minkowski,
+ LC_LOOP3VEC (ML_BSSN_Minkowski,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -105,84 +107,184 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL phiL = IfThen(conformalMethod,1,0);
+ CCTK_REAL_VEC phiL = IfThen(conformalMethod,ToReal(1),ToReal(0));
- CCTK_REAL gt11L = 1;
+ CCTK_REAL_VEC gt11L = ToReal(1);
- CCTK_REAL gt12L = 0;
+ CCTK_REAL_VEC gt12L = ToReal(0);
- CCTK_REAL gt13L = 0;
+ CCTK_REAL_VEC gt13L = ToReal(0);
- CCTK_REAL gt22L = 1;
+ CCTK_REAL_VEC gt22L = ToReal(1);
- CCTK_REAL gt23L = 0;
+ CCTK_REAL_VEC gt23L = ToReal(0);
- CCTK_REAL gt33L = 1;
+ CCTK_REAL_VEC gt33L = ToReal(1);
- CCTK_REAL trKL = 0;
+ CCTK_REAL_VEC trKL = ToReal(0);
- CCTK_REAL At11L = 0;
+ CCTK_REAL_VEC At11L = ToReal(0);
- CCTK_REAL At12L = 0;
+ CCTK_REAL_VEC At12L = ToReal(0);
- CCTK_REAL At13L = 0;
+ CCTK_REAL_VEC At13L = ToReal(0);
- CCTK_REAL At22L = 0;
+ CCTK_REAL_VEC At22L = ToReal(0);
- CCTK_REAL At23L = 0;
+ CCTK_REAL_VEC At23L = ToReal(0);
- CCTK_REAL At33L = 0;
+ CCTK_REAL_VEC At33L = ToReal(0);
- CCTK_REAL Xt1L = 0;
+ CCTK_REAL_VEC Xt1L = ToReal(0);
- CCTK_REAL Xt2L = 0;
+ CCTK_REAL_VEC Xt2L = ToReal(0);
- CCTK_REAL Xt3L = 0;
+ CCTK_REAL_VEC Xt3L = ToReal(0);
- CCTK_REAL alphaL = 1;
+ CCTK_REAL_VEC alphaL = ToReal(1);
- CCTK_REAL AL = 0;
+ CCTK_REAL_VEC AL = ToReal(0);
- CCTK_REAL beta1L = 0;
+ CCTK_REAL_VEC beta1L = ToReal(0);
- CCTK_REAL beta2L = 0;
+ CCTK_REAL_VEC beta2L = ToReal(0);
- CCTK_REAL beta3L = 0;
+ CCTK_REAL_VEC beta3L = ToReal(0);
- CCTK_REAL B1L = 0;
+ CCTK_REAL_VEC B1L = ToReal(0);
- CCTK_REAL B2L = 0;
+ CCTK_REAL_VEC B2L = ToReal(0);
- CCTK_REAL B3L = 0;
+ CCTK_REAL_VEC B3L = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(A[index],AL,elt_count);
+ vec_store_nta_partial_hi(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_hi(At11[index],At11L,elt_count);
+ vec_store_nta_partial_hi(At12[index],At12L,elt_count);
+ vec_store_nta_partial_hi(At13[index],At13L,elt_count);
+ vec_store_nta_partial_hi(At22[index],At22L,elt_count);
+ vec_store_nta_partial_hi(At23[index],At23L,elt_count);
+ vec_store_nta_partial_hi(At33[index],At33L,elt_count);
+ vec_store_nta_partial_hi(B1[index],B1L,elt_count);
+ vec_store_nta_partial_hi(B2[index],B2L,elt_count);
+ vec_store_nta_partial_hi(B3[index],B3L,elt_count);
+ vec_store_nta_partial_hi(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_hi(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_hi(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_hi(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_hi(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_hi(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_hi(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_hi(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_hi(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_hi(phi[index],phiL,elt_count);
+ vec_store_nta_partial_hi(trK[index],trKL,elt_count);
+ vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(A[index],AL,elt_count);
+ vec_store_nta_partial_lo(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_lo(At11[index],At11L,elt_count);
+ vec_store_nta_partial_lo(At12[index],At12L,elt_count);
+ vec_store_nta_partial_lo(At13[index],At13L,elt_count);
+ vec_store_nta_partial_lo(At22[index],At22L,elt_count);
+ vec_store_nta_partial_lo(At23[index],At23L,elt_count);
+ vec_store_nta_partial_lo(At33[index],At33L,elt_count);
+ vec_store_nta_partial_lo(B1[index],B1L,elt_count);
+ vec_store_nta_partial_lo(B2[index],B2L,elt_count);
+ vec_store_nta_partial_lo(B3[index],B3L,elt_count);
+ vec_store_nta_partial_lo(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_lo(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_lo(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_lo(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_lo(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_lo(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_lo(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_lo(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_lo(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_lo(phi[index],phiL,elt_count);
+ vec_store_nta_partial_lo(trK[index],trKL,elt_count);
+ vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- A[index] = AL;
- alpha[index] = alphaL;
- At11[index] = At11L;
- At12[index] = At12L;
- At13[index] = At13L;
- At22[index] = At22L;
- At23[index] = At23L;
- At33[index] = At33L;
- B1[index] = B1L;
- B2[index] = B2L;
- B3[index] = B3L;
- beta1[index] = beta1L;
- beta2[index] = beta2L;
- beta3[index] = beta3L;
- gt11[index] = gt11L;
- gt12[index] = gt12L;
- gt13[index] = gt13L;
- gt22[index] = gt22L;
- gt23[index] = gt23L;
- gt33[index] = gt33L;
- phi[index] = phiL;
- trK[index] = trKL;
- Xt1[index] = Xt1L;
- Xt2[index] = Xt2L;
- Xt3[index] = Xt3L;
+ vec_store_nta(A[index],AL);
+ vec_store_nta(alpha[index],alphaL);
+ vec_store_nta(At11[index],At11L);
+ vec_store_nta(At12[index],At12L);
+ vec_store_nta(At13[index],At13L);
+ vec_store_nta(At22[index],At22L);
+ vec_store_nta(At23[index],At23L);
+ vec_store_nta(At33[index],At33L);
+ vec_store_nta(B1[index],B1L);
+ vec_store_nta(B2[index],B2L);
+ vec_store_nta(B3[index],B3L);
+ vec_store_nta(beta1[index],beta1L);
+ vec_store_nta(beta2[index],beta2L);
+ vec_store_nta(beta3[index],beta3L);
+ vec_store_nta(gt11[index],gt11L);
+ vec_store_nta(gt12[index],gt12L);
+ vec_store_nta(gt13[index],gt13L);
+ vec_store_nta(gt22[index],gt22L);
+ vec_store_nta(gt23[index],gt23L);
+ vec_store_nta(gt33[index],gt33L);
+ vec_store_nta(phi[index],phiL);
+ vec_store_nta(trK[index],trKL);
+ vec_store_nta(Xt1[index],Xt1L);
+ vec_store_nta(Xt2[index],Xt2L);
+ vec_store_nta(Xt3[index],Xt3L);
}
- LC_ENDLOOP3 (ML_BSSN_Minkowski);
+ LC_ENDLOOP3VEC (ML_BSSN_Minkowski);
}
extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_RHS1.cc b/ML_BSSN/src/ML_BSSN_RHS1.cc
index 1c2e051..41593a4 100644
--- a/ML_BSSN/src/ML_BSSN_RHS1.cc
+++ b/ML_BSSN/src/ML_BSSN_RHS1.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_RHS1_SelectBCs(CCTK_ARGUMENTS)
{
@@ -86,93 +87,94 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir,
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_RHS1,
+ LC_LOOP3VEC (ML_BSSN_RHS1,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL rL = r[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
-
- CCTK_REAL eTttL, eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC rL = vec_load(r[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+
+ CCTK_REAL_VEC eTttL, eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
if (*stress_energy_state)
{
- eTttL = eTtt[index];
- eTtxL = eTtx[index];
- eTtyL = eTty[index];
- eTtzL = eTtz[index];
- eTxxL = eTxx[index];
- eTxyL = eTxy[index];
- eTxzL = eTxz[index];
- eTyyL = eTyy[index];
- eTyzL = eTyz[index];
- eTzzL = eTzz[index];
+ eTttL = vec_load(eTtt[index]);
+ eTtxL = vec_load(eTtx[index]);
+ eTtyL = vec_load(eTty[index]);
+ eTtzL = vec_load(eTtz[index]);
+ eTxxL = vec_load(eTxx[index]);
+ eTxyL = vec_load(eTxy[index]);
+ eTxzL = vec_load(eTxz[index]);
+ eTyyL = vec_load(eTyy[index]);
+ eTyzL = vec_load(eTyz[index]);
+ eTzzL = vec_load(eTzz[index]);
}
else
{
@@ -191,66 +193,66 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir,
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDstandardNth1alpha = PDstandardNth1(&alpha[index]);
- CCTK_REAL const PDstandardNth2alpha = PDstandardNth2(&alpha[index]);
- CCTK_REAL const PDstandardNth3alpha = PDstandardNth3(&alpha[index]);
- CCTK_REAL const PDstandardNth11alpha = PDstandardNth11(&alpha[index]);
- CCTK_REAL const PDstandardNth22alpha = PDstandardNth22(&alpha[index]);
- CCTK_REAL const PDstandardNth33alpha = PDstandardNth33(&alpha[index]);
- CCTK_REAL const PDstandardNth12alpha = PDstandardNth12(&alpha[index]);
- CCTK_REAL const PDstandardNth13alpha = PDstandardNth13(&alpha[index]);
- CCTK_REAL const PDstandardNth23alpha = PDstandardNth23(&alpha[index]);
- CCTK_REAL const PDstandardNth1beta1 = PDstandardNth1(&beta1[index]);
- CCTK_REAL const PDstandardNth2beta1 = PDstandardNth2(&beta1[index]);
- CCTK_REAL const PDstandardNth3beta1 = PDstandardNth3(&beta1[index]);
- CCTK_REAL const PDstandardNth11beta1 = PDstandardNth11(&beta1[index]);
- CCTK_REAL const PDstandardNth22beta1 = PDstandardNth22(&beta1[index]);
- CCTK_REAL const PDstandardNth33beta1 = PDstandardNth33(&beta1[index]);
- CCTK_REAL const PDstandardNth12beta1 = PDstandardNth12(&beta1[index]);
- CCTK_REAL const PDstandardNth13beta1 = PDstandardNth13(&beta1[index]);
- CCTK_REAL const PDstandardNth23beta1 = PDstandardNth23(&beta1[index]);
- CCTK_REAL const PDstandardNth1beta2 = PDstandardNth1(&beta2[index]);
- CCTK_REAL const PDstandardNth2beta2 = PDstandardNth2(&beta2[index]);
- CCTK_REAL const PDstandardNth3beta2 = PDstandardNth3(&beta2[index]);
- CCTK_REAL const PDstandardNth11beta2 = PDstandardNth11(&beta2[index]);
- CCTK_REAL const PDstandardNth22beta2 = PDstandardNth22(&beta2[index]);
- CCTK_REAL const PDstandardNth33beta2 = PDstandardNth33(&beta2[index]);
- CCTK_REAL const PDstandardNth12beta2 = PDstandardNth12(&beta2[index]);
- CCTK_REAL const PDstandardNth13beta2 = PDstandardNth13(&beta2[index]);
- CCTK_REAL const PDstandardNth23beta2 = PDstandardNth23(&beta2[index]);
- CCTK_REAL const PDstandardNth1beta3 = PDstandardNth1(&beta3[index]);
- CCTK_REAL const PDstandardNth2beta3 = PDstandardNth2(&beta3[index]);
- CCTK_REAL const PDstandardNth3beta3 = PDstandardNth3(&beta3[index]);
- CCTK_REAL const PDstandardNth11beta3 = PDstandardNth11(&beta3[index]);
- CCTK_REAL const PDstandardNth22beta3 = PDstandardNth22(&beta3[index]);
- CCTK_REAL const PDstandardNth33beta3 = PDstandardNth33(&beta3[index]);
- CCTK_REAL const PDstandardNth12beta3 = PDstandardNth12(&beta3[index]);
- CCTK_REAL const PDstandardNth13beta3 = PDstandardNth13(&beta3[index]);
- CCTK_REAL const PDstandardNth23beta3 = PDstandardNth23(&beta3[index]);
- CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
- CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
- CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
- CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
- CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
- CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
- CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
- CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
- CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
- CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
- CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
- CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
- CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
- CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
- CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
- CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
- CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
- CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
- CCTK_REAL const PDstandardNth1phi = PDstandardNth1(&phi[index]);
- CCTK_REAL const PDstandardNth2phi = PDstandardNth2(&phi[index]);
- CCTK_REAL const PDstandardNth3phi = PDstandardNth3(&phi[index]);
- CCTK_REAL const PDstandardNth1trK = PDstandardNth1(&trK[index]);
- CCTK_REAL const PDstandardNth2trK = PDstandardNth2(&trK[index]);
- CCTK_REAL const PDstandardNth3trK = PDstandardNth3(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth1alpha = PDstandardNth1(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth2alpha = PDstandardNth2(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth3alpha = PDstandardNth3(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth11alpha = PDstandardNth11(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth22alpha = PDstandardNth22(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth33alpha = PDstandardNth33(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth12alpha = PDstandardNth12(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth13alpha = PDstandardNth13(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth23alpha = PDstandardNth23(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta1 = PDstandardNth1(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta1 = PDstandardNth2(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta1 = PDstandardNth3(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth11beta1 = PDstandardNth11(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth22beta1 = PDstandardNth22(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth33beta1 = PDstandardNth33(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth12beta1 = PDstandardNth12(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth13beta1 = PDstandardNth13(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth23beta1 = PDstandardNth23(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta2 = PDstandardNth1(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta2 = PDstandardNth2(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta2 = PDstandardNth3(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth11beta2 = PDstandardNth11(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth22beta2 = PDstandardNth22(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth33beta2 = PDstandardNth33(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth12beta2 = PDstandardNth12(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth13beta2 = PDstandardNth13(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth23beta2 = PDstandardNth23(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta3 = PDstandardNth1(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta3 = PDstandardNth2(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta3 = PDstandardNth3(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth11beta3 = PDstandardNth11(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth22beta3 = PDstandardNth22(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth33beta3 = PDstandardNth33(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth12beta3 = PDstandardNth12(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth13beta3 = PDstandardNth13(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth23beta3 = PDstandardNth23(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth1phi = PDstandardNth1(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth2phi = PDstandardNth2(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth3phi = PDstandardNth3(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth1trK = PDstandardNth1(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth2trK = PDstandardNth2(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth3trK = PDstandardNth3(&trK[index]);
/* Calculate temporaries and grid functions */
ptrdiff_t dir1 = Sign(beta1L);
@@ -259,340 +261,383 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir,
ptrdiff_t dir3 = Sign(beta3L);
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL Gtl111 = 0.5*PDstandardNth1gt11;
+ CCTK_REAL_VEC Gtl111 = kmul(PDstandardNth1gt11,ToReal(0.5));
- CCTK_REAL Gtl112 = 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl112 = kmul(PDstandardNth2gt11,ToReal(0.5));
- CCTK_REAL Gtl113 = 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl113 = kmul(PDstandardNth3gt11,ToReal(0.5));
- CCTK_REAL Gtl122 = -0.5*PDstandardNth1gt22 + PDstandardNth2gt12;
+ CCTK_REAL_VEC Gtl122 =
+ kmadd(PDstandardNth1gt22,ToReal(-0.5),PDstandardNth2gt12);
- CCTK_REAL Gtl123 = 0.5*(-PDstandardNth1gt23 + PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl123 =
+ kmul(kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23)),ToReal(0.5));
- CCTK_REAL Gtl133 = -0.5*PDstandardNth1gt33 + PDstandardNth3gt13;
+ CCTK_REAL_VEC Gtl133 =
+ kmadd(PDstandardNth1gt33,ToReal(-0.5),PDstandardNth3gt13);
- CCTK_REAL Gtl211 = PDstandardNth1gt12 - 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl211 =
+ kmadd(PDstandardNth2gt11,ToReal(-0.5),PDstandardNth1gt12);
- CCTK_REAL Gtl212 = 0.5*PDstandardNth1gt22;
+ CCTK_REAL_VEC Gtl212 = kmul(PDstandardNth1gt22,ToReal(0.5));
- CCTK_REAL Gtl213 = 0.5*(PDstandardNth1gt23 - PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl213 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13)),ToReal(0.5));
- CCTK_REAL Gtl222 = 0.5*PDstandardNth2gt22;
+ CCTK_REAL_VEC Gtl222 = kmul(PDstandardNth2gt22,ToReal(0.5));
- CCTK_REAL Gtl223 = 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl223 = kmul(PDstandardNth3gt22,ToReal(0.5));
- CCTK_REAL Gtl233 = -0.5*PDstandardNth2gt33 + PDstandardNth3gt23;
+ CCTK_REAL_VEC Gtl233 =
+ kmadd(PDstandardNth2gt33,ToReal(-0.5),PDstandardNth3gt23);
- CCTK_REAL Gtl311 = PDstandardNth1gt13 - 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl311 =
+ kmadd(PDstandardNth3gt11,ToReal(-0.5),PDstandardNth1gt13);
- CCTK_REAL Gtl312 = 0.5*(PDstandardNth1gt23 + PDstandardNth2gt13 -
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl312 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12)),ToReal(0.5));
- CCTK_REAL Gtl313 = 0.5*PDstandardNth1gt33;
+ CCTK_REAL_VEC Gtl313 = kmul(PDstandardNth1gt33,ToReal(0.5));
- CCTK_REAL Gtl322 = PDstandardNth2gt23 - 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl322 =
+ kmadd(PDstandardNth3gt22,ToReal(-0.5),PDstandardNth2gt23);
- CCTK_REAL Gtl323 = 0.5*PDstandardNth2gt33;
+ CCTK_REAL_VEC Gtl323 = kmul(PDstandardNth2gt33,ToReal(0.5));
- CCTK_REAL Gtl333 = 0.5*PDstandardNth3gt33;
+ CCTK_REAL_VEC Gtl333 = kmul(PDstandardNth3gt33,ToReal(0.5));
- CCTK_REAL Gt111 = Gtl111*gtu11 + Gtl211*gtu12 + Gtl311*gtu13;
+ CCTK_REAL_VEC Gt111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl211,gtu12,kmul(Gtl311,gtu13)));
- CCTK_REAL Gt211 = Gtl111*gtu12 + Gtl211*gtu22 + Gtl311*gtu23;
+ CCTK_REAL_VEC Gt211 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl211,gtu22,kmul(Gtl311,gtu23)));
- CCTK_REAL Gt311 = Gtl111*gtu13 + Gtl211*gtu23 + Gtl311*gtu33;
+ CCTK_REAL_VEC Gt311 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl211,gtu23,kmul(Gtl311,gtu33)));
- CCTK_REAL Gt112 = Gtl112*gtu11 + Gtl212*gtu12 + Gtl312*gtu13;
+ CCTK_REAL_VEC Gt112 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl312,gtu13)));
- CCTK_REAL Gt212 = Gtl112*gtu12 + Gtl212*gtu22 + Gtl312*gtu23;
+ CCTK_REAL_VEC Gt212 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl312,gtu23)));
- CCTK_REAL Gt312 = Gtl112*gtu13 + Gtl212*gtu23 + Gtl312*gtu33;
+ CCTK_REAL_VEC Gt312 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl312,gtu33)));
- CCTK_REAL Gt113 = Gtl113*gtu11 + Gtl213*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gt113 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl213,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gt213 = Gtl113*gtu12 + Gtl213*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gt213 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl213,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gt313 = Gtl113*gtu13 + Gtl213*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gt313 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl213,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gt122 = Gtl122*gtu11 + Gtl222*gtu12 + Gtl322*gtu13;
+ CCTK_REAL_VEC Gt122 =
+ kmadd(Gtl122,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl322,gtu13)));
- CCTK_REAL Gt222 = Gtl122*gtu12 + Gtl222*gtu22 + Gtl322*gtu23;
+ CCTK_REAL_VEC Gt222 =
+ kmadd(Gtl122,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl322,gtu23)));
- CCTK_REAL Gt322 = Gtl122*gtu13 + Gtl222*gtu23 + Gtl322*gtu33;
+ CCTK_REAL_VEC Gt322 =
+ kmadd(Gtl122,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl322,gtu33)));
- CCTK_REAL Gt123 = Gtl123*gtu11 + Gtl223*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gt123 =
+ kmadd(Gtl123,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gt223 = Gtl123*gtu12 + Gtl223*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gt223 =
+ kmadd(Gtl123,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gt323 = Gtl123*gtu13 + Gtl223*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gt323 =
+ kmadd(Gtl123,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gt133 = Gtl133*gtu11 + Gtl233*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gt133 =
+ kmadd(Gtl133,gtu11,kmadd(Gtl233,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gt233 = Gtl133*gtu12 + Gtl233*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gt233 =
+ kmadd(Gtl133,gtu12,kmadd(Gtl233,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gt333 = Gtl133*gtu13 + Gtl233*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gt333 =
+ kmadd(Gtl133,gtu13,kmadd(Gtl233,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL Xtn1 = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 +
- Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33;
+ CCTK_REAL_VEC Xtn1 =
+ kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmul(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn2 = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 +
- Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33;
+ CCTK_REAL_VEC Xtn2 =
+ kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmul(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn3 = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 +
- Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33;
+ CCTK_REAL_VEC Xtn3 =
+ kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmul(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2)))));
- CCTK_REAL fac1 = IfThen(conformalMethod,-0.5*INV(phiL),1);
+ CCTK_REAL_VEC fac1 =
+ IfThen(conformalMethod,kmul(INV(phiL),ToReal(-0.5)),ToReal(1));
- CCTK_REAL cdphi1 = fac1*PDstandardNth1phi;
+ CCTK_REAL_VEC cdphi1 = kmul(fac1,PDstandardNth1phi);
- CCTK_REAL cdphi2 = fac1*PDstandardNth2phi;
+ CCTK_REAL_VEC cdphi2 = kmul(fac1,PDstandardNth2phi);
- CCTK_REAL cdphi3 = fac1*PDstandardNth3phi;
+ CCTK_REAL_VEC cdphi3 = kmul(fac1,PDstandardNth3phi);
- CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu12 + At13L*gtu13;
+ CCTK_REAL_VEC Atm11 =
+ kmadd(At11L,gtu11,kmadd(At12L,gtu12,kmul(At13L,gtu13)));
- CCTK_REAL Atm21 = At11L*gtu12 + At12L*gtu22 + At13L*gtu23;
+ CCTK_REAL_VEC Atm21 =
+ kmadd(At11L,gtu12,kmadd(At12L,gtu22,kmul(At13L,gtu23)));
- CCTK_REAL Atm31 = At11L*gtu13 + At12L*gtu23 + At13L*gtu33;
+ CCTK_REAL_VEC Atm31 =
+ kmadd(At11L,gtu13,kmadd(At12L,gtu23,kmul(At13L,gtu33)));
- CCTK_REAL Atm12 = At12L*gtu11 + At22L*gtu12 + At23L*gtu13;
+ CCTK_REAL_VEC Atm12 =
+ kmadd(At12L,gtu11,kmadd(At22L,gtu12,kmul(At23L,gtu13)));
- CCTK_REAL Atm22 = At12L*gtu12 + At22L*gtu22 + At23L*gtu23;
+ CCTK_REAL_VEC Atm22 =
+ kmadd(At12L,gtu12,kmadd(At22L,gtu22,kmul(At23L,gtu23)));
- CCTK_REAL Atm32 = At12L*gtu13 + At22L*gtu23 + At23L*gtu33;
+ CCTK_REAL_VEC Atm32 =
+ kmadd(At12L,gtu13,kmadd(At22L,gtu23,kmul(At23L,gtu33)));
- CCTK_REAL Atm13 = At13L*gtu11 + At23L*gtu12 + At33L*gtu13;
+ CCTK_REAL_VEC Atm13 =
+ kmadd(At13L,gtu11,kmadd(At23L,gtu12,kmul(At33L,gtu13)));
- CCTK_REAL Atm23 = At13L*gtu12 + At23L*gtu22 + At33L*gtu23;
+ CCTK_REAL_VEC Atm23 =
+ kmadd(At13L,gtu12,kmadd(At23L,gtu22,kmul(At33L,gtu23)));
- CCTK_REAL Atm33 = At13L*gtu13 + At23L*gtu23 + At33L*gtu33;
+ CCTK_REAL_VEC Atm33 =
+ kmadd(At13L,gtu13,kmadd(At23L,gtu23,kmul(At33L,gtu33)));
- CCTK_REAL Atu11 = Atm11*gtu11 + Atm12*gtu12 + Atm13*gtu13;
+ CCTK_REAL_VEC Atu11 =
+ kmadd(Atm11,gtu11,kmadd(Atm12,gtu12,kmul(Atm13,gtu13)));
- CCTK_REAL Atu12 = Atm11*gtu12 + Atm12*gtu22 + Atm13*gtu23;
+ CCTK_REAL_VEC Atu12 =
+ kmadd(Atm11,gtu12,kmadd(Atm12,gtu22,kmul(Atm13,gtu23)));
- CCTK_REAL Atu13 = Atm11*gtu13 + Atm12*gtu23 + Atm13*gtu33;
+ CCTK_REAL_VEC Atu13 =
+ kmadd(Atm11,gtu13,kmadd(Atm12,gtu23,kmul(Atm13,gtu33)));
- CCTK_REAL Atu22 = Atm21*gtu12 + Atm22*gtu22 + Atm23*gtu23;
+ CCTK_REAL_VEC Atu22 =
+ kmadd(Atm21,gtu12,kmadd(Atm22,gtu22,kmul(Atm23,gtu23)));
- CCTK_REAL Atu23 = Atm21*gtu13 + Atm22*gtu23 + Atm23*gtu33;
+ CCTK_REAL_VEC Atu23 =
+ kmadd(Atm21,gtu13,kmadd(Atm22,gtu23,kmul(Atm23,gtu33)));
- CCTK_REAL Atu33 = Atm31*gtu13 + Atm32*gtu23 + Atm33*gtu33;
+ CCTK_REAL_VEC Atu33 =
+ kmadd(Atm31,gtu13,kmadd(Atm32,gtu23,kmul(Atm33,gtu33)));
- CCTK_REAL e4phi = IfThen(conformalMethod,INV(SQR(phiL)),exp(4*phiL));
+ CCTK_REAL_VEC e4phi =
+ IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4))));
- CCTK_REAL em4phi = INV(e4phi);
+ CCTK_REAL_VEC em4phi = INV(e4phi);
- CCTK_REAL rho = INV(SQR(alphaL))*(eTttL - 2*(beta2L*eTtyL +
- beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + beta3L*eTxzL) +
- beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + eTyyL*SQR(beta2L) +
- eTzzL*SQR(beta3L));
+ CCTK_REAL_VEC rho =
+ kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2))))))));
- CCTK_REAL S1 = (-eTtxL + beta1L*eTxxL + beta2L*eTxyL +
- beta3L*eTxzL)*INV(alphaL);
+ CCTK_REAL_VEC S1 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxxL,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL))));
- CCTK_REAL S2 = (-eTtyL + beta1L*eTxyL + beta2L*eTyyL +
- beta3L*eTyzL)*INV(alphaL);
+ CCTK_REAL_VEC S2 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxyL,kmadd(beta2L,eTyyL,kmsub(beta3L,eTyzL,eTtyL))));
- CCTK_REAL S3 = (-eTtzL + beta1L*eTxzL + beta2L*eTyzL +
- beta3L*eTzzL)*INV(alphaL);
+ CCTK_REAL_VEC S3 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL))));
- CCTK_REAL trS = em4phi*(eTxxL*gtu11 + eTyyL*gtu22 + 2*(eTxyL*gtu12 +
- eTxzL*gtu13 + eTyzL*gtu23) + eTzzL*gtu33);
+ CCTK_REAL_VEC trS =
+ kmul(em4phi,kmadd(eTxxL,gtu11,kmadd(eTyyL,gtu22,kmadd(eTzzL,gtu33,kmul(kmadd(eTxyL,gtu12,kmadd(eTxzL,gtu13,kmul(eTyzL,gtu23))),ToReal(2))))));
- CCTK_REAL phirhsL =
- IfThen(conformalMethod,phiL*(-0.333333333333333333333333333333*(PDstandardNth1beta1
- + PDstandardNth2beta2 + PDstandardNth3beta3) +
- 0.333333333333333333333333333333*alphaL*trKL),0.166666666666666666666666666667*(PDstandardNth1beta1
- + PDstandardNth2beta2 + PDstandardNth3beta3) -
- 0.166666666666666666666666666667*alphaL*trKL);
+ CCTK_REAL_VEC phirhsL =
+ IfThen(conformalMethod,kmul(phiL,kmadd(kadd(PDstandardNth1beta1,kadd(PDstandardNth2beta2,PDstandardNth3beta3)),ToReal(-0.333333333333333333333333333333),kmul(alphaL,kmul(trKL,ToReal(0.333333333333333333333333333333))))),kmadd(alphaL,kmul(trKL,ToReal(-0.166666666666666666666666666667)),kmul(kadd(PDstandardNth1beta1,kadd(PDstandardNth2beta2,PDstandardNth3beta3)),ToReal(0.166666666666666666666666666667))));
- CCTK_REAL gt11rhsL = -0.666666666666666666666666666667*(3*alphaL*At11L
- - 3*(gt12L*PDstandardNth1beta2 + gt13L*PDstandardNth1beta3) +
- gt11L*(-2*PDstandardNth1beta1 + PDstandardNth2beta2 +
- PDstandardNth3beta3));
+ CCTK_REAL_VEC gt11rhsL =
+ kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt12L,PDstandardNth1beta2,kmul(gt13L,PDstandardNth1beta3)),ToReal(-3),kmadd(gt11L,kadd(PDstandardNth2beta2,kmadd(PDstandardNth1beta1,ToReal(-2),PDstandardNth3beta3)),kmul(alphaL,kmul(At11L,ToReal(3))))));
- CCTK_REAL gt12rhsL = 0.333333333333333333333333333333*(-6*alphaL*At12L
- + 3*(gt22L*PDstandardNth1beta2 + gt23L*PDstandardNth1beta3 +
- gt11L*PDstandardNth2beta1 + gt13L*PDstandardNth2beta3) +
- gt12L*(PDstandardNth1beta1 + PDstandardNth2beta2 -
- 2*PDstandardNth3beta3));
+ CCTK_REAL_VEC gt12rhsL =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(alphaL,kmul(At12L,ToReal(-6)),kmadd(gt12L,kadd(PDstandardNth1beta1,kmadd(PDstandardNth3beta3,ToReal(-2),PDstandardNth2beta2)),kmul(kmadd(gt22L,PDstandardNth1beta2,kmadd(gt23L,PDstandardNth1beta3,kmadd(gt11L,PDstandardNth2beta1,kmul(gt13L,PDstandardNth2beta3)))),ToReal(3)))));
- CCTK_REAL gt13rhsL = 0.333333333333333333333333333333*(-6*alphaL*At13L
- + 3*(gt23L*PDstandardNth1beta2 + gt33L*PDstandardNth1beta3 +
- gt11L*PDstandardNth3beta1 + gt12L*PDstandardNth3beta2) +
- gt13L*(PDstandardNth1beta1 - 2*PDstandardNth2beta2 +
- PDstandardNth3beta3));
-
- CCTK_REAL gt22rhsL = -0.666666666666666666666666666667*(3*alphaL*At22L
- - 3*(gt12L*PDstandardNth2beta1 + gt23L*PDstandardNth2beta3) +
- gt22L*(PDstandardNth1beta1 - 2*PDstandardNth2beta2 +
- PDstandardNth3beta3));
-
- CCTK_REAL gt23rhsL = 0.333333333333333333333333333333*(-6*alphaL*At23L
- + 3*(gt13L*PDstandardNth2beta1 + gt33L*PDstandardNth2beta3 +
- gt12L*PDstandardNth3beta1 + gt22L*PDstandardNth3beta2) +
- gt23L*(-2*PDstandardNth1beta1 + PDstandardNth2beta2 +
- PDstandardNth3beta3));
-
- CCTK_REAL gt33rhsL = -0.666666666666666666666666666667*(3*alphaL*At33L
- - 3*(gt13L*PDstandardNth3beta1 + gt23L*PDstandardNth3beta2) +
- gt33L*(PDstandardNth1beta1 + PDstandardNth2beta2 -
- 2*PDstandardNth3beta3));
-
- CCTK_REAL dotXt1 =
- 0.333333333333333333333333333333*(7*(gtu12*PDstandardNth12beta1 +
- gtu13*PDstandardNth13beta1) + 6*gtu23*PDstandardNth23beta1 +
- 3*(gtu22*PDstandardNth22beta1 + gtu33*PDstandardNth33beta1) -
- 6*(Atu11*PDstandardNth1alpha + Atu12*PDstandardNth2alpha +
- Atu13*PDstandardNth3alpha) + gtu11*(4*PDstandardNth11beta1 +
- PDstandardNth12beta2 + PDstandardNth13beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S1) +
- gtu12*(PDstandardNth22beta2 + PDstandardNth23beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S2) +
- gtu13*(PDstandardNth23beta2 + PDstandardNth33beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S3) +
- (-PDstandardNth1beta1 + 2*PDstandardNth3beta3)*Xtn1 +
- 2*(alphaL*(18*(Atu11*cdphi1 + Atu12*cdphi2 + Atu13*cdphi3) +
- 6*(Atu12*Gt112 + Atu13*Gt113 + Atu23*Gt123) + 3*(Atu11*Gt111 +
- Atu22*Gt122 + Atu33*Gt133) - 2*(gtu11*PDstandardNth1trK +
- gtu12*PDstandardNth2trK + gtu13*PDstandardNth3trK)) +
- PDstandardNth2beta2*Xtn1) - 3*(PDstandardNth2beta1*Xtn2 +
- PDstandardNth3beta1*Xtn3));
-
- CCTK_REAL dotXt2 =
- 0.333333333333333333333333333333*(6*gtu13*PDstandardNth13beta2 +
- 3*(gtu11*PDstandardNth11beta2 + gtu33*PDstandardNth33beta2) -
- 6*(Atu12*PDstandardNth1alpha + Atu22*PDstandardNth2alpha +
- Atu23*PDstandardNth3alpha) + gtu12*(PDstandardNth11beta1 +
- 7*PDstandardNth12beta2 + PDstandardNth13beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S1) +
- gtu22*(PDstandardNth12beta1 + 4*PDstandardNth22beta2 +
- PDstandardNth23beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S2) +
- gtu23*(PDstandardNth13beta1 + 7*PDstandardNth23beta2 +
- PDstandardNth33beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S3) +
- (-PDstandardNth2beta2 + 2*PDstandardNth3beta3)*Xtn2 +
- 2*(alphaL*(18*(Atu12*cdphi1 + Atu22*cdphi2 + Atu23*cdphi3) +
- 6*(Atu12*Gt212 + Atu13*Gt213 + Atu23*Gt223) + 3*(Atu11*Gt211 +
- Atu22*Gt222 + Atu33*Gt233) - 2*(gtu12*PDstandardNth1trK +
- gtu22*PDstandardNth2trK + gtu23*PDstandardNth3trK)) +
- PDstandardNth1beta1*Xtn2) - 3*(PDstandardNth1beta2*Xtn1 +
- PDstandardNth3beta2*Xtn3));
-
- CCTK_REAL dotXt3 =
- 0.333333333333333333333333333333*(6*gtu12*PDstandardNth12beta3 +
- 3*(gtu11*PDstandardNth11beta3 + gtu22*PDstandardNth22beta3) -
- 6*(Atu13*PDstandardNth1alpha + Atu23*PDstandardNth2alpha +
- Atu33*PDstandardNth3alpha) + gtu13*(PDstandardNth11beta1 +
- PDstandardNth12beta2 + 7*PDstandardNth13beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S1) +
- gtu23*(PDstandardNth12beta1 + PDstandardNth22beta2 +
- 7*PDstandardNth23beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S2) +
- gtu33*(PDstandardNth13beta1 + PDstandardNth23beta2 +
- 4*PDstandardNth33beta3 -
- 150.7964473723100754462068823974161384415*alphaL*S3) -
- 3*(PDstandardNth1beta3*Xtn1 + PDstandardNth2beta3*Xtn2) +
- (2*PDstandardNth2beta2 - PDstandardNth3beta3)*Xtn3 +
- 2*(alphaL*(18*(Atu13*cdphi1 + Atu23*cdphi2 + Atu33*cdphi3) +
- 6*(Atu12*Gt312 + Atu13*Gt313 + Atu23*Gt323) + 3*(Atu11*Gt311 +
- Atu22*Gt322 + Atu33*Gt333) - 2*(gtu13*PDstandardNth1trK +
- gtu23*PDstandardNth2trK + gtu33*PDstandardNth3trK)) +
- PDstandardNth1beta1*Xtn3));
-
- CCTK_REAL Xt1rhsL = dotXt1;
-
- CCTK_REAL Xt2rhsL = dotXt2;
-
- CCTK_REAL Xt3rhsL = dotXt3;
-
- CCTK_REAL dottrK = -(em4phi*(gtu11*PDstandardNth11alpha +
- gtu22*PDstandardNth22alpha + gtu33*(PDstandardNth33alpha +
- 2*cdphi3*PDstandardNth3alpha) + 2*(gtu12*PDstandardNth12alpha +
- gtu13*(PDstandardNth13alpha + cdphi1*PDstandardNth3alpha) +
- gtu23*(PDstandardNth23alpha + cdphi2*PDstandardNth3alpha)) +
- PDstandardNth1alpha*(2*(cdphi1*gtu11 + cdphi2*gtu12 + cdphi3*gtu13) -
- Xtn1) + PDstandardNth2alpha*(2*(cdphi1*gtu12 + cdphi2*gtu22 +
- cdphi3*gtu23) - Xtn2) - PDstandardNth3alpha*Xtn3)) +
- alphaL*(2*(Atm12*Atm21 + Atm13*Atm31 + Atm23*Atm32) +
- 12.56637061435917295385057353311801153679*(rho + trS) + SQR(Atm11) +
- SQR(Atm22) + SQR(Atm33) + 0.333333333333333333333333333333*SQR(trKL));
-
- CCTK_REAL trKrhsL = dottrK;
-
- CCTK_REAL alpharhsL =
- -(pow(alphaL,ToReal(harmonicN))*ToReal(harmonicF)*(trKL + (AL -
- trKL)*ToReal(LapseACoeff)));
-
- CCTK_REAL ArhsL = (dottrK -
- AL*ToReal(AlphaDriver))*ToReal(LapseACoeff);
-
- CCTK_REAL eta = fmin(1,INV(rL)*ToReal(SpatialBetaDriverRadius));
-
- CCTK_REAL theta = fmin(1,exp(1 -
- rL*INV(ToReal(SpatialShiftGammaCoeffRadius))));
-
- CCTK_REAL beta1rhsL = theta*(Xt1L + beta1L*eta*ToReal(BetaDriver)*(-1
- + ToReal(ShiftBCoeff)) + (B1L -
- Xt1L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
-
- CCTK_REAL beta2rhsL = theta*(Xt2L + beta2L*eta*ToReal(BetaDriver)*(-1
- + ToReal(ShiftBCoeff)) + (B2L -
- Xt2L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
-
- CCTK_REAL beta3rhsL = theta*(Xt3L + beta3L*eta*ToReal(BetaDriver)*(-1
- + ToReal(ShiftBCoeff)) + (B3L -
- Xt3L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
-
- CCTK_REAL B1rhsL = (dotXt1 -
- B1L*eta*ToReal(BetaDriver))*ToReal(ShiftBCoeff);
-
- CCTK_REAL B2rhsL = (dotXt2 -
- B2L*eta*ToReal(BetaDriver))*ToReal(ShiftBCoeff);
-
- CCTK_REAL B3rhsL = (dotXt3 -
- B3L*eta*ToReal(BetaDriver))*ToReal(ShiftBCoeff);
+ CCTK_REAL_VEC gt13rhsL =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(alphaL,kmul(At13L,ToReal(-6)),kmadd(gt13L,kadd(PDstandardNth1beta1,kmadd(PDstandardNth2beta2,ToReal(-2),PDstandardNth3beta3)),kmul(kmadd(gt23L,PDstandardNth1beta2,kmadd(gt33L,PDstandardNth1beta3,kmadd(gt11L,PDstandardNth3beta1,kmul(gt12L,PDstandardNth3beta2)))),ToReal(3)))));
+
+ CCTK_REAL_VEC gt22rhsL =
+ kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt12L,PDstandardNth2beta1,kmul(gt23L,PDstandardNth2beta3)),ToReal(-3),kmadd(gt22L,kadd(PDstandardNth1beta1,kmadd(PDstandardNth2beta2,ToReal(-2),PDstandardNth3beta3)),kmul(alphaL,kmul(At22L,ToReal(3))))));
+
+ CCTK_REAL_VEC gt23rhsL =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(alphaL,kmul(At23L,ToReal(-6)),kmadd(gt23L,kadd(PDstandardNth2beta2,kmadd(PDstandardNth1beta1,ToReal(-2),PDstandardNth3beta3)),kmul(kmadd(gt13L,PDstandardNth2beta1,kmadd(gt33L,PDstandardNth2beta3,kmadd(gt12L,PDstandardNth3beta1,kmul(gt22L,PDstandardNth3beta2)))),ToReal(3)))));
+
+ CCTK_REAL_VEC gt33rhsL =
+ kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,PDstandardNth3beta1,kmul(gt23L,PDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(PDstandardNth1beta1,kmadd(PDstandardNth3beta3,ToReal(-2),PDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3))))));
+
+ CCTK_REAL_VEC dotXt1 =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(gtu12,kadd(PDstandardNth22beta2,kmadd(alphaL,kmul(S2,ToReal(-150.7964473723100754462068823974161384415)),PDstandardNth23beta3)),kmadd(gtu13,kadd(PDstandardNth23beta2,kmadd(alphaL,kmul(S3,ToReal(-150.7964473723100754462068823974161384415)),PDstandardNth33beta3)),kmadd(kmadd(Atu11,PDstandardNth1alpha,kmadd(Atu12,PDstandardNth2alpha,kmul(Atu13,PDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(PDstandardNth2beta1,Xtn2,kmul(PDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(PDstandardNth3beta3,ToReal(2),PDstandardNth1beta1),kmadd(kmadd(gtu22,PDstandardNth22beta1,kmul(gtu33,PDstandardNth33beta1)),ToReal(3),kmadd(gtu11,kadd(PDstandardNth12beta2,kadd(PDstandardNth13beta3,kmadd(alphaL,kmul(S1,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth11beta1,ToReal(4))))),kmadd(gtu23,kmul(PDstandardNth23beta1,ToReal(6)),kmadd(kmadd(gtu12,PDstandardNth12beta1,kmul(gtu13,PDstandardNth13beta1)),ToReal(7),kmul(ToReal(2),kmadd(PDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,PDstandardNth1trK,kmadd(gtu12,PDstandardNth2trK,kmul(gtu13,PDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt111,kmadd(Atu22,Gt122,kmul(Atu33,Gt133))),ToReal(3),kmadd(kmadd(Atu12,Gt112,kmadd(Atu13,Gt113,kmul(Atu23,Gt123))),ToReal(6),kmul(kmadd(Atu11,cdphi1,kmadd(Atu12,cdphi2,kmul(Atu13,cdphi3))),ToReal(18))))))))))))))))));
+
+ CCTK_REAL_VEC dotXt2 =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,PDstandardNth1alpha,kmadd(Atu22,PDstandardNth2alpha,kmul(Atu23,PDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(PDstandardNth1beta2,Xtn1,kmul(PDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(PDstandardNth3beta3,ToReal(2),PDstandardNth2beta2),kmadd(kmadd(gtu11,PDstandardNth11beta2,kmul(gtu33,PDstandardNth33beta2)),ToReal(3),kmadd(gtu22,kadd(PDstandardNth12beta1,kadd(PDstandardNth23beta3,kmadd(alphaL,kmul(S2,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth22beta2,ToReal(4))))),kmadd(gtu13,kmul(PDstandardNth13beta2,ToReal(6)),kmadd(gtu12,kadd(PDstandardNth11beta1,kadd(PDstandardNth13beta3,kmadd(alphaL,kmul(S1,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth12beta2,ToReal(7))))),kmadd(gtu23,kadd(PDstandardNth13beta1,kadd(PDstandardNth33beta3,kmadd(alphaL,kmul(S3,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth23beta2,ToReal(7))))),kmul(ToReal(2),kmadd(PDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,PDstandardNth1trK,kmadd(gtu22,PDstandardNth2trK,kmul(gtu23,PDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt211,kmadd(Atu22,Gt222,kmul(Atu33,Gt233))),ToReal(3),kmadd(kmadd(Atu12,Gt212,kmadd(Atu13,Gt213,kmul(Atu23,Gt223))),ToReal(6),kmul(kmadd(Atu12,cdphi1,kmadd(Atu22,cdphi2,kmul(Atu23,cdphi3))),ToReal(18)))))))))))))))));
+
+ CCTK_REAL_VEC dotXt3 =
+ kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,PDstandardNth1alpha,kmadd(Atu23,PDstandardNth2alpha,kmul(Atu33,PDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(PDstandardNth1beta3,Xtn1,kmul(PDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(PDstandardNth2beta2,ToReal(2),PDstandardNth3beta3),kmadd(kmadd(gtu11,PDstandardNth11beta3,kmul(gtu22,PDstandardNth22beta3)),ToReal(3),kmadd(gtu33,kadd(PDstandardNth13beta1,kadd(PDstandardNth23beta2,kmadd(alphaL,kmul(S3,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth33beta3,ToReal(4))))),kmadd(gtu12,kmul(PDstandardNth12beta3,ToReal(6)),kmadd(gtu13,kadd(PDstandardNth11beta1,kadd(PDstandardNth12beta2,kmadd(alphaL,kmul(S1,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth13beta3,ToReal(7))))),kmadd(gtu23,kadd(PDstandardNth12beta1,kadd(PDstandardNth22beta2,kmadd(alphaL,kmul(S2,ToReal(-150.7964473723100754462068823974161384415)),kmul(PDstandardNth23beta3,ToReal(7))))),kmul(ToReal(2),kmadd(PDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,PDstandardNth1trK,kmadd(gtu23,PDstandardNth2trK,kmul(gtu33,PDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmadd(Atu22,Gt322,kmul(Atu33,Gt333))),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,Gt313,kmul(Atu23,Gt323))),ToReal(6),kmul(kmadd(Atu13,cdphi1,kmadd(Atu23,cdphi2,kmul(Atu33,cdphi3))),ToReal(18)))))))))))))))));
+
+ CCTK_REAL_VEC Xt1rhsL = dotXt1;
+
+ CCTK_REAL_VEC Xt2rhsL = dotXt2;
+
+ CCTK_REAL_VEC Xt3rhsL = dotXt3;
+
+ CCTK_REAL_VEC dottrK =
+ kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,kmadd(gtu11,PDstandardNth11alpha,kmadd(gtu22,PDstandardNth22alpha,knmsub(PDstandardNth3alpha,Xtn3,kmadd(kmadd(gtu12,PDstandardNth12alpha,kmadd(gtu13,kmadd(cdphi1,PDstandardNth3alpha,PDstandardNth13alpha),kmul(gtu23,kmadd(cdphi2,PDstandardNth3alpha,PDstandardNth23alpha)))),ToReal(2),kmadd(PDstandardNth1alpha,kmsub(kmadd(cdphi1,gtu11,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13))),ToReal(2),Xtn1),kmadd(PDstandardNth2alpha,kmsub(kmadd(cdphi1,gtu12,kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23))),ToReal(2),Xtn2),kmul(gtu33,kmadd(cdphi3,kmul(PDstandardNth3alpha,ToReal(2)),PDstandardNth33alpha))))))))));
+
+ CCTK_REAL_VEC trKrhsL = dottrK;
+
+ CCTK_REAL_VEC alpharhsL =
+ kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL))));
+
+ CCTK_REAL_VEC ArhsL =
+ kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff));
+
+ CCTK_REAL_VEC eta =
+ kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius)));
+
+ CCTK_REAL_VEC theta =
+ kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1))));
+
+ CCTK_REAL_VEC beta1rhsL =
+ kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
+
+ CCTK_REAL_VEC beta2rhsL =
+ kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
+
+ CCTK_REAL_VEC beta3rhsL =
+ kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
+
+ CCTK_REAL_VEC B1rhsL =
+ kmul(knmsub(B1L,kmul(eta,ToReal(BetaDriver)),dotXt1),ToReal(ShiftBCoeff));
+
+ CCTK_REAL_VEC B2rhsL =
+ kmul(knmsub(B2L,kmul(eta,ToReal(BetaDriver)),dotXt2),ToReal(ShiftBCoeff));
+
+ CCTK_REAL_VEC B3rhsL =
+ kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_RHS1);
+ LC_ENDLOOP3VEC (ML_BSSN_RHS1);
}
extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_RHS2.cc b/ML_BSSN/src/ML_BSSN_RHS2.cc
index e78e16c..8710f4b 100644
--- a/ML_BSSN/src/ML_BSSN_RHS2.cc
+++ b/ML_BSSN/src/ML_BSSN_RHS2.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_RHS2_SelectBCs(CCTK_ARGUMENTS)
{
@@ -65,84 +66,85 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir,
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_RHS2,
+ LC_LOOP3VEC (ML_BSSN_RHS2,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
-
- CCTK_REAL eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+
+ CCTK_REAL_VEC eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
if (*stress_energy_state)
{
- eTxxL = eTxx[index];
- eTxyL = eTxy[index];
- eTxzL = eTxz[index];
- eTyyL = eTyy[index];
- eTyzL = eTyz[index];
- eTzzL = eTzz[index];
+ eTxxL = vec_load(eTxx[index]);
+ eTxyL = vec_load(eTxy[index]);
+ eTxzL = vec_load(eTxz[index]);
+ eTyyL = vec_load(eTyy[index]);
+ eTyzL = vec_load(eTyz[index]);
+ eTzzL = vec_load(eTzz[index]);
}
else
{
@@ -157,96 +159,96 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir,
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDstandardNth1alpha = PDstandardNth1(&alpha[index]);
- CCTK_REAL const PDstandardNth2alpha = PDstandardNth2(&alpha[index]);
- CCTK_REAL const PDstandardNth3alpha = PDstandardNth3(&alpha[index]);
- CCTK_REAL const PDstandardNth11alpha = PDstandardNth11(&alpha[index]);
- CCTK_REAL const PDstandardNth22alpha = PDstandardNth22(&alpha[index]);
- CCTK_REAL const PDstandardNth33alpha = PDstandardNth33(&alpha[index]);
- CCTK_REAL const PDstandardNth12alpha = PDstandardNth12(&alpha[index]);
- CCTK_REAL const PDstandardNth13alpha = PDstandardNth13(&alpha[index]);
- CCTK_REAL const PDstandardNth23alpha = PDstandardNth23(&alpha[index]);
- CCTK_REAL const PDstandardNth1beta1 = PDstandardNth1(&beta1[index]);
- CCTK_REAL const PDstandardNth2beta1 = PDstandardNth2(&beta1[index]);
- CCTK_REAL const PDstandardNth3beta1 = PDstandardNth3(&beta1[index]);
- CCTK_REAL const PDstandardNth1beta2 = PDstandardNth1(&beta2[index]);
- CCTK_REAL const PDstandardNth2beta2 = PDstandardNth2(&beta2[index]);
- CCTK_REAL const PDstandardNth3beta2 = PDstandardNth3(&beta2[index]);
- CCTK_REAL const PDstandardNth1beta3 = PDstandardNth1(&beta3[index]);
- CCTK_REAL const PDstandardNth2beta3 = PDstandardNth2(&beta3[index]);
- CCTK_REAL const PDstandardNth3beta3 = PDstandardNth3(&beta3[index]);
- CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
- CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
- CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
- CCTK_REAL const PDstandardNth11gt11 = PDstandardNth11(&gt11[index]);
- CCTK_REAL const PDstandardNth22gt11 = PDstandardNth22(&gt11[index]);
- CCTK_REAL const PDstandardNth33gt11 = PDstandardNth33(&gt11[index]);
- CCTK_REAL const PDstandardNth12gt11 = PDstandardNth12(&gt11[index]);
- CCTK_REAL const PDstandardNth13gt11 = PDstandardNth13(&gt11[index]);
- CCTK_REAL const PDstandardNth23gt11 = PDstandardNth23(&gt11[index]);
- CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
- CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
- CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
- CCTK_REAL const PDstandardNth11gt12 = PDstandardNth11(&gt12[index]);
- CCTK_REAL const PDstandardNth22gt12 = PDstandardNth22(&gt12[index]);
- CCTK_REAL const PDstandardNth33gt12 = PDstandardNth33(&gt12[index]);
- CCTK_REAL const PDstandardNth12gt12 = PDstandardNth12(&gt12[index]);
- CCTK_REAL const PDstandardNth13gt12 = PDstandardNth13(&gt12[index]);
- CCTK_REAL const PDstandardNth23gt12 = PDstandardNth23(&gt12[index]);
- CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
- CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
- CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
- CCTK_REAL const PDstandardNth11gt13 = PDstandardNth11(&gt13[index]);
- CCTK_REAL const PDstandardNth22gt13 = PDstandardNth22(&gt13[index]);
- CCTK_REAL const PDstandardNth33gt13 = PDstandardNth33(&gt13[index]);
- CCTK_REAL const PDstandardNth12gt13 = PDstandardNth12(&gt13[index]);
- CCTK_REAL const PDstandardNth13gt13 = PDstandardNth13(&gt13[index]);
- CCTK_REAL const PDstandardNth23gt13 = PDstandardNth23(&gt13[index]);
- CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
- CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
- CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
- CCTK_REAL const PDstandardNth11gt22 = PDstandardNth11(&gt22[index]);
- CCTK_REAL const PDstandardNth22gt22 = PDstandardNth22(&gt22[index]);
- CCTK_REAL const PDstandardNth33gt22 = PDstandardNth33(&gt22[index]);
- CCTK_REAL const PDstandardNth12gt22 = PDstandardNth12(&gt22[index]);
- CCTK_REAL const PDstandardNth13gt22 = PDstandardNth13(&gt22[index]);
- CCTK_REAL const PDstandardNth23gt22 = PDstandardNth23(&gt22[index]);
- CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
- CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
- CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
- CCTK_REAL const PDstandardNth11gt23 = PDstandardNth11(&gt23[index]);
- CCTK_REAL const PDstandardNth22gt23 = PDstandardNth22(&gt23[index]);
- CCTK_REAL const PDstandardNth33gt23 = PDstandardNth33(&gt23[index]);
- CCTK_REAL const PDstandardNth12gt23 = PDstandardNth12(&gt23[index]);
- CCTK_REAL const PDstandardNth13gt23 = PDstandardNth13(&gt23[index]);
- CCTK_REAL const PDstandardNth23gt23 = PDstandardNth23(&gt23[index]);
- CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
- CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
- CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
- CCTK_REAL const PDstandardNth11gt33 = PDstandardNth11(&gt33[index]);
- CCTK_REAL const PDstandardNth22gt33 = PDstandardNth22(&gt33[index]);
- CCTK_REAL const PDstandardNth33gt33 = PDstandardNth33(&gt33[index]);
- CCTK_REAL const PDstandardNth12gt33 = PDstandardNth12(&gt33[index]);
- CCTK_REAL const PDstandardNth13gt33 = PDstandardNth13(&gt33[index]);
- CCTK_REAL const PDstandardNth23gt33 = PDstandardNth23(&gt33[index]);
- CCTK_REAL const PDstandardNth1phi = PDstandardNth1(&phi[index]);
- CCTK_REAL const PDstandardNth2phi = PDstandardNth2(&phi[index]);
- CCTK_REAL const PDstandardNth3phi = PDstandardNth3(&phi[index]);
- CCTK_REAL const PDstandardNth11phi = PDstandardNth11(&phi[index]);
- CCTK_REAL const PDstandardNth22phi = PDstandardNth22(&phi[index]);
- CCTK_REAL const PDstandardNth33phi = PDstandardNth33(&phi[index]);
- CCTK_REAL const PDstandardNth12phi = PDstandardNth12(&phi[index]);
- CCTK_REAL const PDstandardNth13phi = PDstandardNth13(&phi[index]);
- CCTK_REAL const PDstandardNth23phi = PDstandardNth23(&phi[index]);
- CCTK_REAL const PDstandardNth1Xt1 = PDstandardNth1(&Xt1[index]);
- CCTK_REAL const PDstandardNth2Xt1 = PDstandardNth2(&Xt1[index]);
- CCTK_REAL const PDstandardNth3Xt1 = PDstandardNth3(&Xt1[index]);
- CCTK_REAL const PDstandardNth1Xt2 = PDstandardNth1(&Xt2[index]);
- CCTK_REAL const PDstandardNth2Xt2 = PDstandardNth2(&Xt2[index]);
- CCTK_REAL const PDstandardNth3Xt2 = PDstandardNth3(&Xt2[index]);
- CCTK_REAL const PDstandardNth1Xt3 = PDstandardNth1(&Xt3[index]);
- CCTK_REAL const PDstandardNth2Xt3 = PDstandardNth2(&Xt3[index]);
- CCTK_REAL const PDstandardNth3Xt3 = PDstandardNth3(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth1alpha = PDstandardNth1(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth2alpha = PDstandardNth2(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth3alpha = PDstandardNth3(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth11alpha = PDstandardNth11(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth22alpha = PDstandardNth22(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth33alpha = PDstandardNth33(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth12alpha = PDstandardNth12(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth13alpha = PDstandardNth13(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth23alpha = PDstandardNth23(&alpha[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta1 = PDstandardNth1(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta1 = PDstandardNth2(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta1 = PDstandardNth3(&beta1[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta2 = PDstandardNth1(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta2 = PDstandardNth2(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta2 = PDstandardNth3(&beta2[index]);
+ CCTK_REAL_VEC const PDstandardNth1beta3 = PDstandardNth1(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth2beta3 = PDstandardNth2(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth3beta3 = PDstandardNth3(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt11 = PDstandardNth11(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt11 = PDstandardNth22(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt11 = PDstandardNth33(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt11 = PDstandardNth12(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt11 = PDstandardNth13(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt11 = PDstandardNth23(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt12 = PDstandardNth11(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt12 = PDstandardNth22(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt12 = PDstandardNth33(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt12 = PDstandardNth12(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt12 = PDstandardNth13(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt12 = PDstandardNth23(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt13 = PDstandardNth11(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt13 = PDstandardNth22(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt13 = PDstandardNth33(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt13 = PDstandardNth12(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt13 = PDstandardNth13(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt13 = PDstandardNth23(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt22 = PDstandardNth11(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt22 = PDstandardNth22(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt22 = PDstandardNth33(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt22 = PDstandardNth12(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt22 = PDstandardNth13(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt22 = PDstandardNth23(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt23 = PDstandardNth11(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt23 = PDstandardNth22(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt23 = PDstandardNth33(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt23 = PDstandardNth12(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt23 = PDstandardNth13(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt23 = PDstandardNth23(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt33 = PDstandardNth11(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt33 = PDstandardNth22(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt33 = PDstandardNth33(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt33 = PDstandardNth12(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt33 = PDstandardNth13(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt33 = PDstandardNth23(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth1phi = PDstandardNth1(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth2phi = PDstandardNth2(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth3phi = PDstandardNth3(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth11phi = PDstandardNth11(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth22phi = PDstandardNth22(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth33phi = PDstandardNth33(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth12phi = PDstandardNth12(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth13phi = PDstandardNth13(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth23phi = PDstandardNth23(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt1 = PDstandardNth1(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt1 = PDstandardNth2(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt1 = PDstandardNth3(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt2 = PDstandardNth1(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt2 = PDstandardNth2(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt2 = PDstandardNth3(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt3 = PDstandardNth1(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt3 = PDstandardNth2(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt3 = PDstandardNth3(&Xt3[index]);
/* Calculate temporaries and grid functions */
ptrdiff_t dir1 = Sign(beta1L);
@@ -255,465 +257,440 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir,
ptrdiff_t dir3 = Sign(beta3L);
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL Gtl111 = 0.5*PDstandardNth1gt11;
+ CCTK_REAL_VEC Gtl111 = kmul(PDstandardNth1gt11,ToReal(0.5));
- CCTK_REAL Gtl112 = 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl112 = kmul(PDstandardNth2gt11,ToReal(0.5));
- CCTK_REAL Gtl113 = 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl113 = kmul(PDstandardNth3gt11,ToReal(0.5));
- CCTK_REAL Gtl122 = -0.5*PDstandardNth1gt22 + PDstandardNth2gt12;
+ CCTK_REAL_VEC Gtl122 =
+ kmadd(PDstandardNth1gt22,ToReal(-0.5),PDstandardNth2gt12);
- CCTK_REAL Gtl123 = 0.5*(-PDstandardNth1gt23 + PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl123 =
+ kmul(kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23)),ToReal(0.5));
- CCTK_REAL Gtl133 = -0.5*PDstandardNth1gt33 + PDstandardNth3gt13;
+ CCTK_REAL_VEC Gtl133 =
+ kmadd(PDstandardNth1gt33,ToReal(-0.5),PDstandardNth3gt13);
- CCTK_REAL Gtl211 = PDstandardNth1gt12 - 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl211 =
+ kmadd(PDstandardNth2gt11,ToReal(-0.5),PDstandardNth1gt12);
- CCTK_REAL Gtl212 = 0.5*PDstandardNth1gt22;
+ CCTK_REAL_VEC Gtl212 = kmul(PDstandardNth1gt22,ToReal(0.5));
- CCTK_REAL Gtl213 = 0.5*(PDstandardNth1gt23 - PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl213 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13)),ToReal(0.5));
- CCTK_REAL Gtl222 = 0.5*PDstandardNth2gt22;
+ CCTK_REAL_VEC Gtl222 = kmul(PDstandardNth2gt22,ToReal(0.5));
- CCTK_REAL Gtl223 = 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl223 = kmul(PDstandardNth3gt22,ToReal(0.5));
- CCTK_REAL Gtl233 = -0.5*PDstandardNth2gt33 + PDstandardNth3gt23;
+ CCTK_REAL_VEC Gtl233 =
+ kmadd(PDstandardNth2gt33,ToReal(-0.5),PDstandardNth3gt23);
- CCTK_REAL Gtl311 = PDstandardNth1gt13 - 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl311 =
+ kmadd(PDstandardNth3gt11,ToReal(-0.5),PDstandardNth1gt13);
- CCTK_REAL Gtl312 = 0.5*(PDstandardNth1gt23 + PDstandardNth2gt13 -
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl312 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12)),ToReal(0.5));
- CCTK_REAL Gtl313 = 0.5*PDstandardNth1gt33;
+ CCTK_REAL_VEC Gtl313 = kmul(PDstandardNth1gt33,ToReal(0.5));
- CCTK_REAL Gtl322 = PDstandardNth2gt23 - 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl322 =
+ kmadd(PDstandardNth3gt22,ToReal(-0.5),PDstandardNth2gt23);
- CCTK_REAL Gtl323 = 0.5*PDstandardNth2gt33;
+ CCTK_REAL_VEC Gtl323 = kmul(PDstandardNth2gt33,ToReal(0.5));
- CCTK_REAL Gtl333 = 0.5*PDstandardNth3gt33;
+ CCTK_REAL_VEC Gtl333 = kmul(PDstandardNth3gt33,ToReal(0.5));
- CCTK_REAL Gtlu111 = Gtl111*gtu11 + Gtl112*gtu12 + Gtl113*gtu13;
+ CCTK_REAL_VEC Gtlu111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl112,gtu12,kmul(Gtl113,gtu13)));
- CCTK_REAL Gtlu112 = Gtl111*gtu12 + Gtl112*gtu22 + Gtl113*gtu23;
+ CCTK_REAL_VEC Gtlu112 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl112,gtu22,kmul(Gtl113,gtu23)));
- CCTK_REAL Gtlu113 = Gtl111*gtu13 + Gtl112*gtu23 + Gtl113*gtu33;
+ CCTK_REAL_VEC Gtlu113 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl112,gtu23,kmul(Gtl113,gtu33)));
- CCTK_REAL Gtlu121 = Gtl112*gtu11 + Gtl122*gtu12 + Gtl123*gtu13;
+ CCTK_REAL_VEC Gtlu121 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl122,gtu12,kmul(Gtl123,gtu13)));
- CCTK_REAL Gtlu122 = Gtl112*gtu12 + Gtl122*gtu22 + Gtl123*gtu23;
+ CCTK_REAL_VEC Gtlu122 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl122,gtu22,kmul(Gtl123,gtu23)));
- CCTK_REAL Gtlu123 = Gtl112*gtu13 + Gtl122*gtu23 + Gtl123*gtu33;
+ CCTK_REAL_VEC Gtlu123 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl122,gtu23,kmul(Gtl123,gtu33)));
- CCTK_REAL Gtlu131 = Gtl113*gtu11 + Gtl123*gtu12 + Gtl133*gtu13;
+ CCTK_REAL_VEC Gtlu131 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl123,gtu12,kmul(Gtl133,gtu13)));
- CCTK_REAL Gtlu132 = Gtl113*gtu12 + Gtl123*gtu22 + Gtl133*gtu23;
+ CCTK_REAL_VEC Gtlu132 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl123,gtu22,kmul(Gtl133,gtu23)));
- CCTK_REAL Gtlu133 = Gtl113*gtu13 + Gtl123*gtu23 + Gtl133*gtu33;
+ CCTK_REAL_VEC Gtlu133 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl123,gtu23,kmul(Gtl133,gtu33)));
- CCTK_REAL Gtlu211 = Gtl211*gtu11 + Gtl212*gtu12 + Gtl213*gtu13;
+ CCTK_REAL_VEC Gtlu211 =
+ kmadd(Gtl211,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl213,gtu13)));
- CCTK_REAL Gtlu212 = Gtl211*gtu12 + Gtl212*gtu22 + Gtl213*gtu23;
+ CCTK_REAL_VEC Gtlu212 =
+ kmadd(Gtl211,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl213,gtu23)));
- CCTK_REAL Gtlu213 = Gtl211*gtu13 + Gtl212*gtu23 + Gtl213*gtu33;
+ CCTK_REAL_VEC Gtlu213 =
+ kmadd(Gtl211,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl213,gtu33)));
- CCTK_REAL Gtlu221 = Gtl212*gtu11 + Gtl222*gtu12 + Gtl223*gtu13;
+ CCTK_REAL_VEC Gtlu221 =
+ kmadd(Gtl212,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl223,gtu13)));
- CCTK_REAL Gtlu222 = Gtl212*gtu12 + Gtl222*gtu22 + Gtl223*gtu23;
+ CCTK_REAL_VEC Gtlu222 =
+ kmadd(Gtl212,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl223,gtu23)));
- CCTK_REAL Gtlu223 = Gtl212*gtu13 + Gtl222*gtu23 + Gtl223*gtu33;
+ CCTK_REAL_VEC Gtlu223 =
+ kmadd(Gtl212,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl223,gtu33)));
- CCTK_REAL Gtlu231 = Gtl213*gtu11 + Gtl223*gtu12 + Gtl233*gtu13;
+ CCTK_REAL_VEC Gtlu231 =
+ kmadd(Gtl213,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl233,gtu13)));
- CCTK_REAL Gtlu232 = Gtl213*gtu12 + Gtl223*gtu22 + Gtl233*gtu23;
+ CCTK_REAL_VEC Gtlu232 =
+ kmadd(Gtl213,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl233,gtu23)));
- CCTK_REAL Gtlu233 = Gtl213*gtu13 + Gtl223*gtu23 + Gtl233*gtu33;
+ CCTK_REAL_VEC Gtlu233 =
+ kmadd(Gtl213,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl233,gtu33)));
- CCTK_REAL Gtlu311 = Gtl311*gtu11 + Gtl312*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gtlu311 =
+ kmadd(Gtl311,gtu11,kmadd(Gtl312,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gtlu312 = Gtl311*gtu12 + Gtl312*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gtlu312 =
+ kmadd(Gtl311,gtu12,kmadd(Gtl312,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gtlu313 = Gtl311*gtu13 + Gtl312*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gtlu313 =
+ kmadd(Gtl311,gtu13,kmadd(Gtl312,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gtlu321 = Gtl312*gtu11 + Gtl322*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gtlu321 =
+ kmadd(Gtl312,gtu11,kmadd(Gtl322,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gtlu322 = Gtl312*gtu12 + Gtl322*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gtlu322 =
+ kmadd(Gtl312,gtu12,kmadd(Gtl322,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gtlu323 = Gtl312*gtu13 + Gtl322*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gtlu323 =
+ kmadd(Gtl312,gtu13,kmadd(Gtl322,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gtlu331 = Gtl313*gtu11 + Gtl323*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gtlu331 =
+ kmadd(Gtl313,gtu11,kmadd(Gtl323,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gtlu332 = Gtl313*gtu12 + Gtl323*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gtlu332 =
+ kmadd(Gtl313,gtu12,kmadd(Gtl323,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gtlu333 = Gtl313*gtu13 + Gtl323*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gtlu333 =
+ kmadd(Gtl313,gtu13,kmadd(Gtl323,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL Gt111 = Gtl111*gtu11 + Gtl211*gtu12 + Gtl311*gtu13;
+ CCTK_REAL_VEC Gt111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl211,gtu12,kmul(Gtl311,gtu13)));
- CCTK_REAL Gt211 = Gtl111*gtu12 + Gtl211*gtu22 + Gtl311*gtu23;
+ CCTK_REAL_VEC Gt211 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl211,gtu22,kmul(Gtl311,gtu23)));
- CCTK_REAL Gt311 = Gtl111*gtu13 + Gtl211*gtu23 + Gtl311*gtu33;
+ CCTK_REAL_VEC Gt311 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl211,gtu23,kmul(Gtl311,gtu33)));
- CCTK_REAL Gt112 = Gtl112*gtu11 + Gtl212*gtu12 + Gtl312*gtu13;
+ CCTK_REAL_VEC Gt112 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl312,gtu13)));
- CCTK_REAL Gt212 = Gtl112*gtu12 + Gtl212*gtu22 + Gtl312*gtu23;
+ CCTK_REAL_VEC Gt212 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl312,gtu23)));
- CCTK_REAL Gt312 = Gtl112*gtu13 + Gtl212*gtu23 + Gtl312*gtu33;
+ CCTK_REAL_VEC Gt312 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl312,gtu33)));
- CCTK_REAL Gt113 = Gtl113*gtu11 + Gtl213*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gt113 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl213,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gt213 = Gtl113*gtu12 + Gtl213*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gt213 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl213,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gt313 = Gtl113*gtu13 + Gtl213*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gt313 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl213,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gt122 = Gtl122*gtu11 + Gtl222*gtu12 + Gtl322*gtu13;
+ CCTK_REAL_VEC Gt122 =
+ kmadd(Gtl122,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl322,gtu13)));
- CCTK_REAL Gt222 = Gtl122*gtu12 + Gtl222*gtu22 + Gtl322*gtu23;
+ CCTK_REAL_VEC Gt222 =
+ kmadd(Gtl122,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl322,gtu23)));
- CCTK_REAL Gt322 = Gtl122*gtu13 + Gtl222*gtu23 + Gtl322*gtu33;
+ CCTK_REAL_VEC Gt322 =
+ kmadd(Gtl122,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl322,gtu33)));
- CCTK_REAL Gt123 = Gtl123*gtu11 + Gtl223*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gt123 =
+ kmadd(Gtl123,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gt223 = Gtl123*gtu12 + Gtl223*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gt223 =
+ kmadd(Gtl123,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gt323 = Gtl123*gtu13 + Gtl223*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gt323 =
+ kmadd(Gtl123,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gt133 = Gtl133*gtu11 + Gtl233*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gt133 =
+ kmadd(Gtl133,gtu11,kmadd(Gtl233,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gt233 = Gtl133*gtu12 + Gtl233*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gt233 =
+ kmadd(Gtl133,gtu12,kmadd(Gtl233,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gt333 = Gtl133*gtu13 + Gtl233*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gt333 =
+ kmadd(Gtl133,gtu13,kmadd(Gtl233,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL Xtn1 = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 +
- Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33;
+ CCTK_REAL_VEC Xtn1 =
+ kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmul(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn2 = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 +
- Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33;
+ CCTK_REAL_VEC Xtn2 =
+ kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmul(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn3 = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 +
- Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33;
+ CCTK_REAL_VEC Xtn3 =
+ kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmul(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2)))));
- CCTK_REAL Rt11 = 0.5*(6*(Gt111*Gtlu111 + Gt112*Gtlu112 +
- Gt113*Gtlu113) + 4*(Gt211*Gtlu121 + Gt212*Gtlu122 + Gt213*Gtlu123 +
- Gt311*Gtlu131 + Gt312*Gtlu132 + Gt313*Gtlu133) -
- gtu11*PDstandardNth11gt11 - 2*gtu12*PDstandardNth12gt11 -
- 2*gtu13*PDstandardNth13gt11 + 2*(Gt211*Gtlu211 + Gt212*Gtlu212 +
- Gt213*Gtlu213 + Gt311*Gtlu311 + Gt312*Gtlu312 + Gt313*Gtlu313 +
- gt11L*PDstandardNth1Xt1) + 2*gt12L*PDstandardNth1Xt2 +
- 2*gt13L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt11 -
- 2*gtu23*PDstandardNth23gt11 - gtu33*PDstandardNth33gt11 + 2*Gtl111*Xtn1
- + 2*Gtl112*Xtn2 + 2*Gtl113*Xtn3);
+ CCTK_REAL_VEC Rt11 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt11,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt11,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt11,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt11,knmsub(gtu22,PDstandardNth22gt11,knmsub(gtu33,PDstandardNth33gt11,kmadd(kmadd(Gt211,Gtlu211,kmadd(Gt212,Gtlu212,kmadd(Gt213,Gtlu213,kmadd(Gt311,Gtlu311,kmadd(Gt312,Gtlu312,kmadd(Gt313,Gtlu313,kmul(gt11L,PDstandardNth1Xt1))))))),ToReal(2),kmadd(gt12L,kmul(PDstandardNth1Xt2,ToReal(2)),kmadd(gt13L,kmul(PDstandardNth1Xt3,ToReal(2)),kmadd(Gtl111,kmul(Xtn1,ToReal(2)),kmadd(Gtl112,kmul(Xtn2,ToReal(2)),kmadd(Gtl113,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt211,Gtlu121,kmadd(Gt212,Gtlu122,kmadd(Gt213,Gtlu123,kmadd(Gt311,Gtlu131,kmadd(Gt312,Gtlu132,kmul(Gt313,Gtlu133)))))),ToReal(4),kmul(kmadd(Gt111,Gtlu111,kmadd(Gt112,Gtlu112,kmul(Gt113,Gtlu113))),ToReal(6))))))))))))))));
- CCTK_REAL Rt12 = 0.5*(4*(Gt211*Gtlu221 + Gt212*Gtlu222 +
- Gt213*Gtlu223) + 2*(Gt112*Gtlu111 + Gt122*Gtlu112 + Gt123*Gtlu113 +
- Gt111*Gtlu121 + Gt212*Gtlu121 + Gt112*Gtlu122 + Gt222*Gtlu122 +
- Gt113*Gtlu123 + Gt223*Gtlu123 + Gt312*Gtlu131 + Gt322*Gtlu132 +
- Gt323*Gtlu133 + Gt111*Gtlu211 + Gt112*Gtlu212 + Gt113*Gtlu213 +
- Gt311*Gtlu231 + Gt312*Gtlu232 + Gt313*Gtlu233 + Gt311*Gtlu321 +
- Gt312*Gtlu322 + Gt313*Gtlu323) - gtu11*PDstandardNth11gt12 -
- 2*gtu12*PDstandardNth12gt12 - 2*gtu13*PDstandardNth13gt12 +
- gt12L*PDstandardNth1Xt1 + gt22L*PDstandardNth1Xt2 +
- gt23L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt12 -
- 2*gtu23*PDstandardNth23gt12 + gt11L*PDstandardNth2Xt1 +
- gt12L*PDstandardNth2Xt2 + gt13L*PDstandardNth2Xt3 -
- gtu33*PDstandardNth33gt12 + Gtl112*Xtn1 + Gtl211*Xtn1 + Gtl122*Xtn2 +
- Gtl212*Xtn2 + Gtl123*Xtn3 + Gtl213*Xtn3);
+ CCTK_REAL_VEC Rt12 =
+ kmul(ToReal(0.5),kmadd(gt12L,PDstandardNth1Xt1,kmadd(gt22L,PDstandardNth1Xt2,kmadd(gt23L,PDstandardNth1Xt3,kmadd(gt11L,PDstandardNth2Xt1,kmadd(gt12L,PDstandardNth2Xt2,kmadd(gt13L,PDstandardNth2Xt3,kmadd(Gtl112,Xtn1,kmadd(Gtl211,Xtn1,kmadd(Gtl122,Xtn2,kmadd(Gtl212,Xtn2,kmadd(Gtl123,Xtn3,kmadd(Gtl213,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt12,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt12,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt12,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt12,knmsub(gtu22,PDstandardNth22gt12,knmsub(gtu33,PDstandardNth33gt12,kmadd(kmadd(Gt112,Gtlu111,kmadd(Gt122,Gtlu112,kmadd(Gt123,Gtlu113,kmadd(Gt111,Gtlu121,kmadd(Gt212,Gtlu121,kmadd(Gt112,Gtlu122,kmadd(Gt222,Gtlu122,kmadd(Gt113,Gtlu123,kmadd(Gt223,Gtlu123,kmadd(Gt312,Gtlu131,kmadd(Gt322,Gtlu132,kmadd(Gt323,Gtlu133,kmadd(Gt111,Gtlu211,kmadd(Gt112,Gtlu212,kmadd(Gt113,Gtlu213,kmadd(Gt311,Gtlu231,kmadd(Gt312,Gtlu232,kmadd(Gt313,Gtlu233,kmadd(Gt311,Gtlu321,kmadd(Gt312,Gtlu322,kmul(Gt313,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt211,Gtlu221,kmadd(Gt212,Gtlu222,kmul(Gt213,Gtlu223))),ToReal(4))))))))))))))))))))));
- CCTK_REAL Rt13 = 0.5*(2*(Gt113*Gtlu111 + Gt123*Gtlu112 + Gt133*Gtlu113
- + Gt213*Gtlu121 + Gt223*Gtlu122 + Gt233*Gtlu123 + Gt111*Gtlu131 +
- Gt313*Gtlu131 + Gt112*Gtlu132 + Gt323*Gtlu132 + Gt113*Gtlu133 +
- Gt333*Gtlu133 + Gt211*Gtlu231 + Gt212*Gtlu232 + Gt213*Gtlu233 +
- Gt111*Gtlu311 + Gt112*Gtlu312 + Gt113*Gtlu313 + Gt211*Gtlu321 +
- Gt212*Gtlu322 + Gt213*Gtlu323) + 4*(Gt311*Gtlu331 + Gt312*Gtlu332 +
- Gt313*Gtlu333) - gtu11*PDstandardNth11gt13 -
- 2*gtu12*PDstandardNth12gt13 - 2*gtu13*PDstandardNth13gt13 +
- gt13L*PDstandardNth1Xt1 + gt23L*PDstandardNth1Xt2 +
- gt33L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt13 -
- 2*gtu23*PDstandardNth23gt13 - gtu33*PDstandardNth33gt13 +
- gt11L*PDstandardNth3Xt1 + gt12L*PDstandardNth3Xt2 +
- gt13L*PDstandardNth3Xt3 + Gtl113*Xtn1 + Gtl311*Xtn1 + Gtl123*Xtn2 +
- Gtl312*Xtn2 + Gtl133*Xtn3 + Gtl313*Xtn3);
+ CCTK_REAL_VEC Rt13 =
+ kmul(ToReal(0.5),kmadd(gt13L,PDstandardNth1Xt1,kmadd(gt23L,PDstandardNth1Xt2,kmadd(gt33L,PDstandardNth1Xt3,kmadd(gt11L,PDstandardNth3Xt1,kmadd(gt12L,PDstandardNth3Xt2,kmadd(gt13L,PDstandardNth3Xt3,kmadd(Gtl113,Xtn1,kmadd(Gtl311,Xtn1,kmadd(Gtl123,Xtn2,kmadd(Gtl312,Xtn2,kmadd(Gtl133,Xtn3,kmadd(Gtl313,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt13,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt13,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt13,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt13,knmsub(gtu22,PDstandardNth22gt13,knmsub(gtu33,PDstandardNth33gt13,kmadd(kmadd(Gt113,Gtlu111,kmadd(Gt123,Gtlu112,kmadd(Gt133,Gtlu113,kmadd(Gt213,Gtlu121,kmadd(Gt223,Gtlu122,kmadd(Gt233,Gtlu123,kmadd(Gt111,Gtlu131,kmadd(Gt313,Gtlu131,kmadd(Gt112,Gtlu132,kmadd(Gt323,Gtlu132,kmadd(Gt113,Gtlu133,kmadd(Gt333,Gtlu133,kmadd(Gt211,Gtlu231,kmadd(Gt212,Gtlu232,kmadd(Gt213,Gtlu233,kmadd(Gt111,Gtlu311,kmadd(Gt112,Gtlu312,kmadd(Gt113,Gtlu313,kmadd(Gt211,Gtlu321,kmadd(Gt212,Gtlu322,kmul(Gt213,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt311,Gtlu331,kmadd(Gt312,Gtlu332,kmul(Gt313,Gtlu333))),ToReal(4))))))))))))))))))))));
- CCTK_REAL Rt22 = 0.5*(6*(Gt212*Gtlu221 + Gt222*Gtlu222 +
- Gt223*Gtlu223) + 4*(Gt112*Gtlu211 + Gt122*Gtlu212 + Gt123*Gtlu213 +
- Gt312*Gtlu231 + Gt322*Gtlu232 + Gt323*Gtlu233) -
- gtu11*PDstandardNth11gt22 - 2*gtu12*PDstandardNth12gt22 -
- 2*gtu13*PDstandardNth13gt22 - gtu22*PDstandardNth22gt22 -
- 2*gtu23*PDstandardNth23gt22 + 2*(Gt112*Gtlu121 + Gt122*Gtlu122 +
- Gt123*Gtlu123 + Gt312*Gtlu321 + Gt322*Gtlu322 + Gt323*Gtlu323 +
- gt12L*PDstandardNth2Xt1) + 2*gt22L*PDstandardNth2Xt2 +
- 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 +
- 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3);
-
- CCTK_REAL Rt23 = 0.5*(2*(Gt112*Gtlu131 + Gt122*Gtlu132 + Gt123*Gtlu133
- + Gt113*Gtlu211 + Gt123*Gtlu212 + Gt133*Gtlu213 + Gt213*Gtlu221 +
- Gt223*Gtlu222 + Gt233*Gtlu223 + Gt212*Gtlu231 + Gt313*Gtlu231 +
- Gt222*Gtlu232 + Gt323*Gtlu232 + Gt223*Gtlu233 + Gt333*Gtlu233 +
- Gt112*Gtlu311 + Gt122*Gtlu312 + Gt123*Gtlu313 + Gt212*Gtlu321 +
- Gt222*Gtlu322 + Gt223*Gtlu323) + 4*(Gt312*Gtlu331 + Gt322*Gtlu332 +
- Gt323*Gtlu333) - gtu11*PDstandardNth11gt23 -
- 2*gtu12*PDstandardNth12gt23 - 2*gtu13*PDstandardNth13gt23 -
- gtu22*PDstandardNth22gt23 - 2*gtu23*PDstandardNth23gt23 +
- gt13L*PDstandardNth2Xt1 + gt23L*PDstandardNth2Xt2 +
- gt33L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt23 +
- gt12L*PDstandardNth3Xt1 + gt22L*PDstandardNth3Xt2 +
- gt23L*PDstandardNth3Xt3 + Gtl213*Xtn1 + Gtl312*Xtn1 + Gtl223*Xtn2 +
- Gtl322*Xtn2 + Gtl233*Xtn3 + Gtl323*Xtn3);
-
- CCTK_REAL Rt33 = 0.5*(4*(Gt113*Gtlu311 + Gt123*Gtlu312 + Gt133*Gtlu313
- + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323) + 6*(Gt313*Gtlu331 +
- Gt323*Gtlu332 + Gt333*Gtlu333) - gtu11*PDstandardNth11gt33 -
- 2*gtu12*PDstandardNth12gt33 - 2*gtu13*PDstandardNth13gt33 -
- gtu22*PDstandardNth22gt33 - 2*gtu23*PDstandardNth23gt33 -
- gtu33*PDstandardNth33gt33 + 2*(Gt113*Gtlu131 + Gt123*Gtlu132 +
- Gt133*Gtlu133 + Gt213*Gtlu231 + Gt223*Gtlu232 + Gt233*Gtlu233 +
- gt13L*PDstandardNth3Xt1) + 2*gt23L*PDstandardNth3Xt2 +
- 2*gt33L*PDstandardNth3Xt3 + 2*Gtl313*Xtn1 + 2*Gtl323*Xtn2 +
- 2*Gtl333*Xtn3);
-
- CCTK_REAL fac1 = IfThen(conformalMethod,-0.5*INV(phiL),1);
-
- CCTK_REAL cdphi1 = fac1*PDstandardNth1phi;
-
- CCTK_REAL cdphi2 = fac1*PDstandardNth2phi;
-
- CCTK_REAL cdphi3 = fac1*PDstandardNth3phi;
-
- CCTK_REAL fac2 = IfThen(conformalMethod,0.5*INV(SQR(phiL)),0);
-
- CCTK_REAL cdphi211 = -(fac1*(-PDstandardNth11phi +
- Gt111*PDstandardNth1phi + Gt211*PDstandardNth2phi +
- Gt311*PDstandardNth3phi)) + fac2*SQR(PDstandardNth1phi);
-
- CCTK_REAL cdphi212 = fac2*PDstandardNth1phi*PDstandardNth2phi -
- fac1*(-PDstandardNth12phi + Gt112*PDstandardNth1phi +
- Gt212*PDstandardNth2phi + Gt312*PDstandardNth3phi);
-
- CCTK_REAL cdphi213 = fac2*PDstandardNth1phi*PDstandardNth3phi -
- fac1*(-PDstandardNth13phi + Gt113*PDstandardNth1phi +
- Gt213*PDstandardNth2phi + Gt313*PDstandardNth3phi);
+ CCTK_REAL_VEC Rt22 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt22,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt22,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt22,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt22,knmsub(gtu22,PDstandardNth22gt22,knmsub(gtu33,PDstandardNth33gt22,kmadd(kmadd(Gt112,Gtlu121,kmadd(Gt122,Gtlu122,kmadd(Gt123,Gtlu123,kmadd(Gt312,Gtlu321,kmadd(Gt322,Gtlu322,kmadd(Gt323,Gtlu323,kmul(gt12L,PDstandardNth2Xt1))))))),ToReal(2),kmadd(gt22L,kmul(PDstandardNth2Xt2,ToReal(2)),kmadd(gt23L,kmul(PDstandardNth2Xt3,ToReal(2)),kmadd(Gtl212,kmul(Xtn1,ToReal(2)),kmadd(Gtl222,kmul(Xtn2,ToReal(2)),kmadd(Gtl223,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt112,Gtlu211,kmadd(Gt122,Gtlu212,kmadd(Gt123,Gtlu213,kmadd(Gt312,Gtlu231,kmadd(Gt322,Gtlu232,kmul(Gt323,Gtlu233)))))),ToReal(4),kmul(kmadd(Gt212,Gtlu221,kmadd(Gt222,Gtlu222,kmul(Gt223,Gtlu223))),ToReal(6))))))))))))))));
- CCTK_REAL cdphi222 = -(fac1*(Gt122*PDstandardNth1phi -
- PDstandardNth22phi + Gt222*PDstandardNth2phi +
- Gt322*PDstandardNth3phi)) + fac2*SQR(PDstandardNth2phi);
+ CCTK_REAL_VEC Rt23 =
+ kmul(ToReal(0.5),kmadd(gt13L,PDstandardNth2Xt1,kmadd(gt23L,PDstandardNth2Xt2,kmadd(gt33L,PDstandardNth2Xt3,kmadd(gt12L,PDstandardNth3Xt1,kmadd(gt22L,PDstandardNth3Xt2,kmadd(gt23L,PDstandardNth3Xt3,kmadd(Gtl213,Xtn1,kmadd(Gtl312,Xtn1,kmadd(Gtl223,Xtn2,kmadd(Gtl322,Xtn2,kmadd(Gtl233,Xtn3,kmadd(Gtl323,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt23,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt23,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt23,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt23,knmsub(gtu22,PDstandardNth22gt23,knmsub(gtu33,PDstandardNth33gt23,kmadd(kmadd(Gt112,Gtlu131,kmadd(Gt122,Gtlu132,kmadd(Gt123,Gtlu133,kmadd(Gt113,Gtlu211,kmadd(Gt123,Gtlu212,kmadd(Gt133,Gtlu213,kmadd(Gt213,Gtlu221,kmadd(Gt223,Gtlu222,kmadd(Gt233,Gtlu223,kmadd(Gt212,Gtlu231,kmadd(Gt313,Gtlu231,kmadd(Gt222,Gtlu232,kmadd(Gt323,Gtlu232,kmadd(Gt223,Gtlu233,kmadd(Gt333,Gtlu233,kmadd(Gt112,Gtlu311,kmadd(Gt122,Gtlu312,kmadd(Gt123,Gtlu313,kmadd(Gt212,Gtlu321,kmadd(Gt222,Gtlu322,kmul(Gt223,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt312,Gtlu331,kmadd(Gt322,Gtlu332,kmul(Gt323,Gtlu333))),ToReal(4))))))))))))))))))))));
- CCTK_REAL cdphi223 = fac2*PDstandardNth2phi*PDstandardNth3phi -
- fac1*(Gt123*PDstandardNth1phi - PDstandardNth23phi +
- Gt223*PDstandardNth2phi + Gt323*PDstandardNth3phi);
+ CCTK_REAL_VEC Rt33 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt33,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt33,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt33,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt33,knmsub(gtu22,PDstandardNth22gt33,knmsub(gtu33,PDstandardNth33gt33,kmadd(kmadd(Gt113,Gtlu131,kmadd(Gt123,Gtlu132,kmadd(Gt133,Gtlu133,kmadd(Gt213,Gtlu231,kmadd(Gt223,Gtlu232,kmadd(Gt233,Gtlu233,kmul(gt13L,PDstandardNth3Xt1))))))),ToReal(2),kmadd(gt23L,kmul(PDstandardNth3Xt2,ToReal(2)),kmadd(gt33L,kmul(PDstandardNth3Xt3,ToReal(2)),kmadd(Gtl313,kmul(Xtn1,ToReal(2)),kmadd(Gtl323,kmul(Xtn2,ToReal(2)),kmadd(Gtl333,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt113,Gtlu311,kmadd(Gt123,Gtlu312,kmadd(Gt133,Gtlu313,kmadd(Gt213,Gtlu321,kmadd(Gt223,Gtlu322,kmul(Gt233,Gtlu323)))))),ToReal(4),kmul(kmadd(Gt313,Gtlu331,kmadd(Gt323,Gtlu332,kmul(Gt333,Gtlu333))),ToReal(6))))))))))))))));
- CCTK_REAL cdphi233 = -(fac1*(Gt133*PDstandardNth1phi +
- Gt233*PDstandardNth2phi - PDstandardNth33phi +
- Gt333*PDstandardNth3phi)) + fac2*SQR(PDstandardNth3phi);
+ CCTK_REAL_VEC fac1 =
+ IfThen(conformalMethod,kmul(INV(phiL),ToReal(-0.5)),ToReal(1));
- CCTK_REAL Rphi11 = -2*(cdphi211 + 2*(-1 + gt11L*gtu11)*SQR(cdphi1) +
- gt11L*(cdphi211*gtu11 + 4*(cdphi1*(cdphi2*gtu12 + cdphi3*gtu13) +
- cdphi2*cdphi3*gtu23) + cdphi233*gtu33 + gtu22*(cdphi222 +
- 2*SQR(cdphi2)) + 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu33*SQR(cdphi3))));
+ CCTK_REAL_VEC cdphi1 = kmul(fac1,PDstandardNth1phi);
- CCTK_REAL Rphi12 = -2*(cdphi212 + cdphi1*(cdphi2*(-2 + 4*gt12L*gtu12)
- + 4*cdphi3*gt12L*gtu13) + gt12L*(cdphi211*gtu11 + 4*cdphi2*cdphi3*gtu23
- + 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu11*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2)) + gtu33*(cdphi233
- + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC cdphi2 = kmul(fac1,PDstandardNth2phi);
- CCTK_REAL Rphi13 = -2*(cdphi213 + cdphi1*(4*cdphi2*gt13L*gtu12 +
- cdphi3*(-2 + 4*gt13L*gtu13)) + gt13L*(cdphi211*gtu11 +
- 4*cdphi2*cdphi3*gtu23 + 2*(cdphi212*gtu12 + cdphi213*gtu13 +
- cdphi223*gtu23 + gtu11*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2))
- + gtu33*(cdphi233 + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC cdphi3 = kmul(fac1,PDstandardNth3phi);
- CCTK_REAL Rphi22 = -2*(cdphi222 + 2*(-1 + gt22L*gtu22)*SQR(cdphi2) +
- gt22L*(cdphi222*gtu22 + 4*(cdphi1*cdphi3*gtu13 + cdphi2*(cdphi1*gtu12 +
- cdphi3*gtu23)) + cdphi233*gtu33 + gtu11*(cdphi211 + 2*SQR(cdphi1)) +
- 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu33*SQR(cdphi3))));
+ CCTK_REAL_VEC fac2 =
+ IfThen(conformalMethod,kmul(INV(SQR(phiL)),ToReal(0.5)),ToReal(0));
- CCTK_REAL Rphi23 = -2*(cdphi223 + cdphi2*(4*cdphi1*gt23L*gtu12 +
- cdphi3*(-2 + 4*gt23L*gtu23)) + gt23L*(4*cdphi1*cdphi3*gtu13 +
- cdphi222*gtu22 + gtu11*(cdphi211 + 2*SQR(cdphi1)) + 2*(cdphi212*gtu12 +
- cdphi213*gtu13 + cdphi223*gtu23 + gtu22*SQR(cdphi2)) + gtu33*(cdphi233
- + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC cdphi211 =
+ kmsub(fac2,SQR(PDstandardNth1phi),kmul(fac1,kmadd(Gt111,PDstandardNth1phi,kmadd(Gt211,PDstandardNth2phi,kmsub(Gt311,PDstandardNth3phi,PDstandardNth11phi)))));
- CCTK_REAL Rphi33 = -2*(cdphi233 + gt33L*((4*cdphi1*cdphi2 +
- 2*cdphi212)*gtu12 + 4*cdphi3*(cdphi1*gtu13 + cdphi2*gtu23) +
- 2*(cdphi213*gtu13 + cdphi223*gtu23) + cdphi233*gtu33 + gtu11*(cdphi211
- + 2*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2))) + 2*(-1 +
- gt33L*gtu33)*SQR(cdphi3));
+ CCTK_REAL_VEC cdphi212 =
+ kmsub(fac2,kmul(PDstandardNth1phi,PDstandardNth2phi),kmul(fac1,kmadd(Gt112,PDstandardNth1phi,kmadd(Gt212,PDstandardNth2phi,kmsub(Gt312,PDstandardNth3phi,PDstandardNth12phi)))));
- CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu12 + At13L*gtu13;
+ CCTK_REAL_VEC cdphi213 =
+ kmsub(fac2,kmul(PDstandardNth1phi,PDstandardNth3phi),kmul(fac1,kmadd(Gt113,PDstandardNth1phi,kmadd(Gt213,PDstandardNth2phi,kmsub(Gt313,PDstandardNth3phi,PDstandardNth13phi)))));
- CCTK_REAL Atm21 = At11L*gtu12 + At12L*gtu22 + At13L*gtu23;
+ CCTK_REAL_VEC cdphi222 =
+ kmsub(fac2,SQR(PDstandardNth2phi),kmul(fac1,kmadd(Gt122,PDstandardNth1phi,kmadd(Gt222,PDstandardNth2phi,kmsub(Gt322,PDstandardNth3phi,PDstandardNth22phi)))));
- CCTK_REAL Atm31 = At11L*gtu13 + At12L*gtu23 + At13L*gtu33;
+ CCTK_REAL_VEC cdphi223 =
+ kmsub(fac2,kmul(PDstandardNth2phi,PDstandardNth3phi),kmul(fac1,kmadd(Gt123,PDstandardNth1phi,kmadd(Gt223,PDstandardNth2phi,kmsub(Gt323,PDstandardNth3phi,PDstandardNth23phi)))));
- CCTK_REAL Atm12 = At12L*gtu11 + At22L*gtu12 + At23L*gtu13;
+ CCTK_REAL_VEC cdphi233 =
+ kmsub(fac2,SQR(PDstandardNth3phi),kmul(fac1,kmadd(Gt133,PDstandardNth1phi,kmadd(Gt233,PDstandardNth2phi,kmsub(Gt333,PDstandardNth3phi,PDstandardNth33phi)))));
- CCTK_REAL Atm22 = At12L*gtu12 + At22L*gtu22 + At23L*gtu23;
+ CCTK_REAL_VEC Rphi11 =
+ kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4))))))))));
- CCTK_REAL Atm32 = At12L*gtu13 + At22L*gtu23 + At23L*gtu33;
+ CCTK_REAL_VEC Rphi12 =
+ kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL Atm13 = At13L*gtu11 + At23L*gtu12 + At33L*gtu13;
+ CCTK_REAL_VEC Rphi13 =
+ kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL Atm23 = At13L*gtu12 + At23L*gtu22 + At33L*gtu23;
+ CCTK_REAL_VEC Rphi22 =
+ kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4))))))))));
- CCTK_REAL Atm33 = At13L*gtu13 + At23L*gtu23 + At33L*gtu33;
+ CCTK_REAL_VEC Rphi23 =
+ kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL e4phi = IfThen(conformalMethod,INV(SQR(phiL)),exp(4*phiL));
+ CCTK_REAL_VEC Rphi33 =
+ kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4))))))))))))));
- CCTK_REAL em4phi = INV(e4phi);
+ CCTK_REAL_VEC Atm11 =
+ kmadd(At11L,gtu11,kmadd(At12L,gtu12,kmul(At13L,gtu13)));
- CCTK_REAL g11 = e4phi*gt11L;
+ CCTK_REAL_VEC Atm21 =
+ kmadd(At11L,gtu12,kmadd(At12L,gtu22,kmul(At13L,gtu23)));
- CCTK_REAL g12 = e4phi*gt12L;
+ CCTK_REAL_VEC Atm31 =
+ kmadd(At11L,gtu13,kmadd(At12L,gtu23,kmul(At13L,gtu33)));
- CCTK_REAL g13 = e4phi*gt13L;
+ CCTK_REAL_VEC Atm12 =
+ kmadd(At12L,gtu11,kmadd(At22L,gtu12,kmul(At23L,gtu13)));
- CCTK_REAL g22 = e4phi*gt22L;
+ CCTK_REAL_VEC Atm22 =
+ kmadd(At12L,gtu12,kmadd(At22L,gtu22,kmul(At23L,gtu23)));
- CCTK_REAL g23 = e4phi*gt23L;
+ CCTK_REAL_VEC Atm32 =
+ kmadd(At12L,gtu13,kmadd(At22L,gtu23,kmul(At23L,gtu33)));
- CCTK_REAL g33 = e4phi*gt33L;
+ CCTK_REAL_VEC Atm13 =
+ kmadd(At13L,gtu11,kmadd(At23L,gtu12,kmul(At33L,gtu13)));
- CCTK_REAL gu11 = em4phi*gtu11;
+ CCTK_REAL_VEC Atm23 =
+ kmadd(At13L,gtu12,kmadd(At23L,gtu22,kmul(At33L,gtu23)));
- CCTK_REAL gu12 = em4phi*gtu12;
+ CCTK_REAL_VEC Atm33 =
+ kmadd(At13L,gtu13,kmadd(At23L,gtu23,kmul(At33L,gtu33)));
- CCTK_REAL gu13 = em4phi*gtu13;
+ CCTK_REAL_VEC e4phi =
+ IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4))));
- CCTK_REAL gu22 = em4phi*gtu22;
+ CCTK_REAL_VEC em4phi = INV(e4phi);
- CCTK_REAL gu23 = em4phi*gtu23;
+ CCTK_REAL_VEC g11 = kmul(e4phi,gt11L);
- CCTK_REAL gu33 = em4phi*gtu33;
+ CCTK_REAL_VEC g12 = kmul(e4phi,gt12L);
- CCTK_REAL R11 = Rphi11 + Rt11;
+ CCTK_REAL_VEC g13 = kmul(e4phi,gt13L);
- CCTK_REAL R12 = Rphi12 + Rt12;
+ CCTK_REAL_VEC g22 = kmul(e4phi,gt22L);
- CCTK_REAL R13 = Rphi13 + Rt13;
+ CCTK_REAL_VEC g23 = kmul(e4phi,gt23L);
- CCTK_REAL R22 = Rphi22 + Rt22;
+ CCTK_REAL_VEC g33 = kmul(e4phi,gt33L);
- CCTK_REAL R23 = Rphi23 + Rt23;
+ CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11);
- CCTK_REAL R33 = Rphi33 + Rt33;
+ CCTK_REAL_VEC gu12 = kmul(em4phi,gtu12);
- CCTK_REAL trS = em4phi*(eTxxL*gtu11 + eTyyL*gtu22 + 2*(eTxyL*gtu12 +
- eTxzL*gtu13 + eTyzL*gtu23) + eTzzL*gtu33);
+ CCTK_REAL_VEC gu13 = kmul(em4phi,gtu13);
- CCTK_REAL Ats11 = -PDstandardNth11alpha + (4*cdphi1 +
- Gt111)*PDstandardNth1alpha + Gt211*PDstandardNth2alpha +
- Gt311*PDstandardNth3alpha + alphaL*R11;
+ CCTK_REAL_VEC gu22 = kmul(em4phi,gtu22);
- CCTK_REAL Ats12 = -PDstandardNth12alpha + (2*cdphi2 +
- Gt112)*PDstandardNth1alpha + (2*cdphi1 + Gt212)*PDstandardNth2alpha +
- Gt312*PDstandardNth3alpha + alphaL*R12;
+ CCTK_REAL_VEC gu23 = kmul(em4phi,gtu23);
- CCTK_REAL Ats13 = -PDstandardNth13alpha + (2*cdphi3 +
- Gt113)*PDstandardNth1alpha + Gt213*PDstandardNth2alpha + (2*cdphi1 +
- Gt313)*PDstandardNth3alpha + alphaL*R13;
+ CCTK_REAL_VEC gu33 = kmul(em4phi,gtu33);
- CCTK_REAL Ats22 = Gt122*PDstandardNth1alpha - PDstandardNth22alpha +
- (4*cdphi2 + Gt222)*PDstandardNth2alpha + Gt322*PDstandardNth3alpha +
- alphaL*R22;
+ CCTK_REAL_VEC R11 = kadd(Rphi11,Rt11);
- CCTK_REAL Ats23 = Gt123*PDstandardNth1alpha - PDstandardNth23alpha +
- (2*cdphi3 + Gt223)*PDstandardNth2alpha + (2*cdphi2 +
- Gt323)*PDstandardNth3alpha + alphaL*R23;
+ CCTK_REAL_VEC R12 = kadd(Rphi12,Rt12);
- CCTK_REAL Ats33 = Gt133*PDstandardNth1alpha +
- Gt233*PDstandardNth2alpha - PDstandardNth33alpha + (4*cdphi3 +
- Gt333)*PDstandardNth3alpha + alphaL*R33;
+ CCTK_REAL_VEC R13 = kadd(Rphi13,Rt13);
- CCTK_REAL trAts = Ats11*gu11 + Ats22*gu22 + 2*(Ats12*gu12 + Ats13*gu13
- + Ats23*gu23) + Ats33*gu33;
+ CCTK_REAL_VEC R22 = kadd(Rphi22,Rt22);
- CCTK_REAL At11rhsL = -2.*alphaL*(At11L*Atm11 + At12L*Atm21 +
- At13L*Atm31) + 2.*(At12L*PDstandardNth1beta2 +
- At13L*PDstandardNth1beta3) +
- At11L*(1.333333333333333333333333333333333333333*PDstandardNth1beta1 -
- 0.6666666666666666666666666666666666666667*(PDstandardNth2beta2 +
- PDstandardNth3beta3) + alphaL*trKL) + em4phi*(Ats11 -
- 0.3333333333333333333333333333333333333333*g11*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTxxL +
- 8.377580409572781969233715688745341024526*g11*trS));
+ CCTK_REAL_VEC R23 = kadd(Rphi23,Rt23);
- CCTK_REAL At12rhsL = -2.*alphaL*(At11L*Atm12 + At12L*Atm22 +
- At13L*Atm32) + At22L*PDstandardNth1beta2 + At23L*PDstandardNth1beta3 +
- At11L*PDstandardNth2beta1 + At13L*PDstandardNth2beta3 +
- At12L*(0.3333333333333333333333333333333333333333*(PDstandardNth1beta1
- + PDstandardNth2beta2) -
- 0.6666666666666666666666666666666666666667*PDstandardNth3beta3 +
- alphaL*trKL) + em4phi*(Ats12 -
- 0.3333333333333333333333333333333333333333*g12*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTxyL +
- 8.377580409572781969233715688745341024526*g12*trS));
+ CCTK_REAL_VEC R33 = kadd(Rphi33,Rt33);
- CCTK_REAL At13rhsL = -2.*alphaL*(At11L*Atm13 + At12L*Atm23 +
- At13L*Atm33) + At23L*PDstandardNth1beta2 + At33L*PDstandardNth1beta3 +
- At11L*PDstandardNth3beta1 + At12L*PDstandardNth3beta2 +
- At13L*(-0.6666666666666666666666666666666666666667*PDstandardNth2beta2
- + 0.3333333333333333333333333333333333333333*(PDstandardNth1beta1 +
- PDstandardNth3beta3) + alphaL*trKL) + em4phi*(Ats13 -
- 0.3333333333333333333333333333333333333333*g13*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTxzL +
- 8.377580409572781969233715688745341024526*g13*trS));
+ CCTK_REAL_VEC trS =
+ kmul(em4phi,kmadd(eTxxL,gtu11,kmadd(eTyyL,gtu22,kmadd(eTzzL,gtu33,kmul(kmadd(eTxyL,gtu12,kmadd(eTxzL,gtu13,kmul(eTyzL,gtu23))),ToReal(2))))));
- CCTK_REAL At22rhsL = -2.*alphaL*(At12L*Atm12 + At22L*Atm22 +
- At23L*Atm32) + 2.*(At12L*PDstandardNth2beta1 +
- At23L*PDstandardNth2beta3) +
- At22L*(1.333333333333333333333333333333333333333*PDstandardNth2beta2 -
- 0.6666666666666666666666666666666666666667*(PDstandardNth1beta1 +
- PDstandardNth3beta3) + alphaL*trKL) + em4phi*(Ats22 -
- 0.3333333333333333333333333333333333333333*g22*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTyyL +
- 8.377580409572781969233715688745341024526*g22*trS));
-
- CCTK_REAL At23rhsL = -2.*alphaL*(At12L*Atm13 + At22L*Atm23 +
- At23L*Atm33) + At13L*PDstandardNth2beta1 + At33L*PDstandardNth2beta3 +
- At12L*PDstandardNth3beta1 + At22L*PDstandardNth3beta2 +
- At23L*(-0.6666666666666666666666666666666666666667*PDstandardNth1beta1
- + 0.3333333333333333333333333333333333333333*(PDstandardNth2beta2 +
- PDstandardNth3beta3) + alphaL*trKL) + em4phi*(Ats23 -
- 0.3333333333333333333333333333333333333333*g23*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTyzL +
- 8.377580409572781969233715688745341024526*g23*trS));
-
- CCTK_REAL At33rhsL = -2.*alphaL*(At13L*Atm13 + At23L*Atm23 +
- At33L*Atm33) + 2.*(At13L*PDstandardNth3beta1 +
- At23L*PDstandardNth3beta2) +
- At33L*(-0.6666666666666666666666666666666666666667*(PDstandardNth1beta1
- + PDstandardNth2beta2) +
- 1.333333333333333333333333333333333333333*PDstandardNth3beta3 +
- alphaL*trKL) + em4phi*(Ats33 -
- 0.3333333333333333333333333333333333333333*g33*trAts +
- alphaL*(-25.13274122871834590770114706623602307358*eTzzL +
- 8.377580409572781969233715688745341024526*g33*trS));
+ CCTK_REAL_VEC Ats11 =
+ kmadd(Gt211,PDstandardNth2alpha,kmadd(Gt311,PDstandardNth3alpha,kmadd(alphaL,R11,kmsub(PDstandardNth1alpha,kmadd(cdphi1,ToReal(4),Gt111),PDstandardNth11alpha))));
+
+ CCTK_REAL_VEC Ats12 =
+ kmadd(Gt312,PDstandardNth3alpha,kmadd(alphaL,R12,ksub(kmadd(PDstandardNth2alpha,kmadd(cdphi1,ToReal(2),Gt212),kmul(PDstandardNth1alpha,kmadd(cdphi2,ToReal(2),Gt112))),PDstandardNth12alpha)));
+
+ CCTK_REAL_VEC Ats13 =
+ kmadd(Gt213,PDstandardNth2alpha,kmadd(alphaL,R13,ksub(kmadd(PDstandardNth3alpha,kmadd(cdphi1,ToReal(2),Gt313),kmul(PDstandardNth1alpha,kmadd(cdphi3,ToReal(2),Gt113))),PDstandardNth13alpha)));
+
+ CCTK_REAL_VEC Ats22 =
+ kmadd(Gt122,PDstandardNth1alpha,kmadd(Gt322,PDstandardNth3alpha,kmadd(alphaL,R22,kmsub(PDstandardNth2alpha,kmadd(cdphi2,ToReal(4),Gt222),PDstandardNth22alpha))));
+
+ CCTK_REAL_VEC Ats23 =
+ kmadd(Gt123,PDstandardNth1alpha,kmadd(alphaL,R23,ksub(kmadd(PDstandardNth3alpha,kmadd(cdphi2,ToReal(2),Gt323),kmul(PDstandardNth2alpha,kmadd(cdphi3,ToReal(2),Gt223))),PDstandardNth23alpha)));
+
+ CCTK_REAL_VEC Ats33 =
+ kmadd(Gt133,PDstandardNth1alpha,kmadd(Gt233,PDstandardNth2alpha,kmadd(alphaL,R33,kmsub(PDstandardNth3alpha,kmadd(cdphi3,ToReal(4),Gt333),PDstandardNth33alpha))));
+
+ CCTK_REAL_VEC trAts =
+ kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2)))));
+
+ CCTK_REAL_VEC At11rhsL =
+ kmadd(alphaL,kmul(kmadd(At11L,Atm11,kmadd(At12L,Atm21,kmul(At13L,Atm31))),ToReal(-2.)),kmadd(At11L,kmadd(alphaL,trKL,kmadd(kadd(PDstandardNth2beta2,PDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(PDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333)))),kmadd(kmadd(At12L,PDstandardNth1beta2,kmul(At13L,PDstandardNth1beta3)),ToReal(2.),kmul(em4phi,kadd(Ats11,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))));
+
+ CCTK_REAL_VEC At12rhsL =
+ kmadd(At22L,PDstandardNth1beta2,kmadd(At23L,PDstandardNth1beta3,kmadd(At11L,PDstandardNth2beta1,kmadd(At13L,PDstandardNth2beta3,kmadd(alphaL,kmul(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.)),kmadd(At12L,kmadd(alphaL,trKL,kmadd(PDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(PDstandardNth1beta1,PDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333)))),kmul(em4phi,kadd(Ats12,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))))));
+
+ CCTK_REAL_VEC At13rhsL =
+ kmadd(At23L,PDstandardNth1beta2,kmadd(At33L,PDstandardNth1beta3,kmadd(At11L,PDstandardNth3beta1,kmadd(At12L,PDstandardNth3beta2,kmadd(alphaL,kmul(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.)),kmadd(At13L,kmadd(alphaL,trKL,kmadd(PDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(PDstandardNth1beta1,PDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333)))),kmul(em4phi,kadd(Ats13,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))))));
+
+ CCTK_REAL_VEC At22rhsL =
+ kmadd(alphaL,kmul(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.)),kmadd(At22L,kmadd(alphaL,trKL,kmadd(kadd(PDstandardNth1beta1,PDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(PDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333)))),kmadd(kmadd(At12L,PDstandardNth2beta1,kmul(At23L,PDstandardNth2beta3)),ToReal(2.),kmul(em4phi,kadd(Ats22,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))));
+
+ CCTK_REAL_VEC At23rhsL =
+ kmadd(At13L,PDstandardNth2beta1,kmadd(At33L,PDstandardNth2beta3,kmadd(At12L,PDstandardNth3beta1,kmadd(At22L,PDstandardNth3beta2,kmadd(alphaL,kmul(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.)),kmadd(At23L,kmadd(alphaL,trKL,kmadd(PDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(PDstandardNth2beta2,PDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333)))),kmul(em4phi,kadd(Ats23,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))))));
+
+ CCTK_REAL_VEC At33rhsL =
+ kmadd(alphaL,kmul(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.)),kmadd(At33L,kmadd(alphaL,trKL,kmadd(kadd(PDstandardNth1beta1,PDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(PDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333)))),kmadd(kmadd(At13L,PDstandardNth3beta1,kmul(At23L,PDstandardNth3beta2)),ToReal(2.),kmul(em4phi,kadd(Ats33,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),kmul(alphaL,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_RHS2);
+ LC_ENDLOOP3VEC (ML_BSSN_RHS2);
}
extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
index d5809e7..6d1c90d 100644
--- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
+++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_RHSStaticBoundary_SelectBCs(CCTK_ARGUMENTS)
{
@@ -88,47 +89,48 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_RHSStaticBoundary,
+ LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -141,84 +143,184 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL phirhsL = 0;
+ CCTK_REAL_VEC phirhsL = ToReal(0);
- CCTK_REAL gt11rhsL = 0;
+ CCTK_REAL_VEC gt11rhsL = ToReal(0);
- CCTK_REAL gt12rhsL = 0;
+ CCTK_REAL_VEC gt12rhsL = ToReal(0);
- CCTK_REAL gt13rhsL = 0;
+ CCTK_REAL_VEC gt13rhsL = ToReal(0);
- CCTK_REAL gt22rhsL = 0;
+ CCTK_REAL_VEC gt22rhsL = ToReal(0);
- CCTK_REAL gt23rhsL = 0;
+ CCTK_REAL_VEC gt23rhsL = ToReal(0);
- CCTK_REAL gt33rhsL = 0;
+ CCTK_REAL_VEC gt33rhsL = ToReal(0);
- CCTK_REAL trKrhsL = 0;
+ CCTK_REAL_VEC trKrhsL = ToReal(0);
- CCTK_REAL At11rhsL = 0;
+ CCTK_REAL_VEC At11rhsL = ToReal(0);
- CCTK_REAL At12rhsL = 0;
+ CCTK_REAL_VEC At12rhsL = ToReal(0);
- CCTK_REAL At13rhsL = 0;
+ CCTK_REAL_VEC At13rhsL = ToReal(0);
- CCTK_REAL At22rhsL = 0;
+ CCTK_REAL_VEC At22rhsL = ToReal(0);
- CCTK_REAL At23rhsL = 0;
+ CCTK_REAL_VEC At23rhsL = ToReal(0);
- CCTK_REAL At33rhsL = 0;
+ CCTK_REAL_VEC At33rhsL = ToReal(0);
- CCTK_REAL Xt1rhsL = 0;
+ CCTK_REAL_VEC Xt1rhsL = ToReal(0);
- CCTK_REAL Xt2rhsL = 0;
+ CCTK_REAL_VEC Xt2rhsL = ToReal(0);
- CCTK_REAL Xt3rhsL = 0;
+ CCTK_REAL_VEC Xt3rhsL = ToReal(0);
- CCTK_REAL alpharhsL = 0;
+ CCTK_REAL_VEC alpharhsL = ToReal(0);
- CCTK_REAL ArhsL = 0;
+ CCTK_REAL_VEC ArhsL = ToReal(0);
- CCTK_REAL beta1rhsL = 0;
+ CCTK_REAL_VEC beta1rhsL = ToReal(0);
- CCTK_REAL beta2rhsL = 0;
+ CCTK_REAL_VEC beta2rhsL = ToReal(0);
- CCTK_REAL beta3rhsL = 0;
+ CCTK_REAL_VEC beta3rhsL = ToReal(0);
- CCTK_REAL B1rhsL = 0;
+ CCTK_REAL_VEC B1rhsL = ToReal(0);
- CCTK_REAL B2rhsL = 0;
+ CCTK_REAL_VEC B2rhsL = ToReal(0);
- CCTK_REAL B3rhsL = 0;
+ CCTK_REAL_VEC B3rhsL = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_RHSStaticBoundary);
+ LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary);
}
extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_boundary.cc b/ML_BSSN/src/ML_BSSN_boundary.cc
index ac1e90f..b00443d 100644
--- a/ML_BSSN/src/ML_BSSN_boundary.cc
+++ b/ML_BSSN/src/ML_BSSN_boundary.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_boundary_SelectBCs(CCTK_ARGUMENTS)
{
@@ -88,47 +89,48 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_boundary,
+ LC_LOOP3VEC (ML_BSSN_boundary,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -141,84 +143,184 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL phiL = IfThen(conformalMethod,1,0);
+ CCTK_REAL_VEC phiL = IfThen(conformalMethod,ToReal(1),ToReal(0));
- CCTK_REAL gt11L = 1;
+ CCTK_REAL_VEC gt11L = ToReal(1);
- CCTK_REAL gt12L = 0;
+ CCTK_REAL_VEC gt12L = ToReal(0);
- CCTK_REAL gt13L = 0;
+ CCTK_REAL_VEC gt13L = ToReal(0);
- CCTK_REAL gt22L = 1;
+ CCTK_REAL_VEC gt22L = ToReal(1);
- CCTK_REAL gt23L = 0;
+ CCTK_REAL_VEC gt23L = ToReal(0);
- CCTK_REAL gt33L = 1;
+ CCTK_REAL_VEC gt33L = ToReal(1);
- CCTK_REAL trKL = 0;
+ CCTK_REAL_VEC trKL = ToReal(0);
- CCTK_REAL At11L = 0;
+ CCTK_REAL_VEC At11L = ToReal(0);
- CCTK_REAL At12L = 0;
+ CCTK_REAL_VEC At12L = ToReal(0);
- CCTK_REAL At13L = 0;
+ CCTK_REAL_VEC At13L = ToReal(0);
- CCTK_REAL At22L = 0;
+ CCTK_REAL_VEC At22L = ToReal(0);
- CCTK_REAL At23L = 0;
+ CCTK_REAL_VEC At23L = ToReal(0);
- CCTK_REAL At33L = 0;
+ CCTK_REAL_VEC At33L = ToReal(0);
- CCTK_REAL Xt1L = 0;
+ CCTK_REAL_VEC Xt1L = ToReal(0);
- CCTK_REAL Xt2L = 0;
+ CCTK_REAL_VEC Xt2L = ToReal(0);
- CCTK_REAL Xt3L = 0;
+ CCTK_REAL_VEC Xt3L = ToReal(0);
- CCTK_REAL alphaL = 1;
+ CCTK_REAL_VEC alphaL = ToReal(1);
- CCTK_REAL AL = 0;
+ CCTK_REAL_VEC AL = ToReal(0);
- CCTK_REAL beta1L = 0;
+ CCTK_REAL_VEC beta1L = ToReal(0);
- CCTK_REAL beta2L = 0;
+ CCTK_REAL_VEC beta2L = ToReal(0);
- CCTK_REAL beta3L = 0;
+ CCTK_REAL_VEC beta3L = ToReal(0);
- CCTK_REAL B1L = 0;
+ CCTK_REAL_VEC B1L = ToReal(0);
- CCTK_REAL B2L = 0;
+ CCTK_REAL_VEC B2L = ToReal(0);
- CCTK_REAL B3L = 0;
+ CCTK_REAL_VEC B3L = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(A[index],AL,elt_count);
+ vec_store_nta_partial_hi(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_hi(At11[index],At11L,elt_count);
+ vec_store_nta_partial_hi(At12[index],At12L,elt_count);
+ vec_store_nta_partial_hi(At13[index],At13L,elt_count);
+ vec_store_nta_partial_hi(At22[index],At22L,elt_count);
+ vec_store_nta_partial_hi(At23[index],At23L,elt_count);
+ vec_store_nta_partial_hi(At33[index],At33L,elt_count);
+ vec_store_nta_partial_hi(B1[index],B1L,elt_count);
+ vec_store_nta_partial_hi(B2[index],B2L,elt_count);
+ vec_store_nta_partial_hi(B3[index],B3L,elt_count);
+ vec_store_nta_partial_hi(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_hi(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_hi(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_hi(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_hi(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_hi(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_hi(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_hi(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_hi(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_hi(phi[index],phiL,elt_count);
+ vec_store_nta_partial_hi(trK[index],trKL,elt_count);
+ vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(A[index],AL,elt_count);
+ vec_store_nta_partial_lo(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_lo(At11[index],At11L,elt_count);
+ vec_store_nta_partial_lo(At12[index],At12L,elt_count);
+ vec_store_nta_partial_lo(At13[index],At13L,elt_count);
+ vec_store_nta_partial_lo(At22[index],At22L,elt_count);
+ vec_store_nta_partial_lo(At23[index],At23L,elt_count);
+ vec_store_nta_partial_lo(At33[index],At33L,elt_count);
+ vec_store_nta_partial_lo(B1[index],B1L,elt_count);
+ vec_store_nta_partial_lo(B2[index],B2L,elt_count);
+ vec_store_nta_partial_lo(B3[index],B3L,elt_count);
+ vec_store_nta_partial_lo(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_lo(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_lo(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_lo(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_lo(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_lo(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_lo(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_lo(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_lo(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_lo(phi[index],phiL,elt_count);
+ vec_store_nta_partial_lo(trK[index],trKL,elt_count);
+ vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- A[index] = AL;
- alpha[index] = alphaL;
- At11[index] = At11L;
- At12[index] = At12L;
- At13[index] = At13L;
- At22[index] = At22L;
- At23[index] = At23L;
- At33[index] = At33L;
- B1[index] = B1L;
- B2[index] = B2L;
- B3[index] = B3L;
- beta1[index] = beta1L;
- beta2[index] = beta2L;
- beta3[index] = beta3L;
- gt11[index] = gt11L;
- gt12[index] = gt12L;
- gt13[index] = gt13L;
- gt22[index] = gt22L;
- gt23[index] = gt23L;
- gt33[index] = gt33L;
- phi[index] = phiL;
- trK[index] = trKL;
- Xt1[index] = Xt1L;
- Xt2[index] = Xt2L;
- Xt3[index] = Xt3L;
+ vec_store_nta(A[index],AL);
+ vec_store_nta(alpha[index],alphaL);
+ vec_store_nta(At11[index],At11L);
+ vec_store_nta(At12[index],At12L);
+ vec_store_nta(At13[index],At13L);
+ vec_store_nta(At22[index],At22L);
+ vec_store_nta(At23[index],At23L);
+ vec_store_nta(At33[index],At33L);
+ vec_store_nta(B1[index],B1L);
+ vec_store_nta(B2[index],B2L);
+ vec_store_nta(B3[index],B3L);
+ vec_store_nta(beta1[index],beta1L);
+ vec_store_nta(beta2[index],beta2L);
+ vec_store_nta(beta3[index],beta3L);
+ vec_store_nta(gt11[index],gt11L);
+ vec_store_nta(gt12[index],gt12L);
+ vec_store_nta(gt13[index],gt13L);
+ vec_store_nta(gt22[index],gt22L);
+ vec_store_nta(gt23[index],gt23L);
+ vec_store_nta(gt33[index],gt33L);
+ vec_store_nta(phi[index],phiL);
+ vec_store_nta(trK[index],trKL);
+ vec_store_nta(Xt1[index],Xt1L);
+ vec_store_nta(Xt2[index],Xt2L);
+ vec_store_nta(Xt3[index],Xt3L);
}
- LC_ENDLOOP3 (ML_BSSN_boundary);
+ LC_ENDLOOP3VEC (ML_BSSN_boundary);
}
extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_constraints1.cc b/ML_BSSN/src/ML_BSSN_constraints1.cc
index 77156fc..3fb83d9 100644
--- a/ML_BSSN/src/ML_BSSN_constraints1.cc
+++ b/ML_BSSN/src/ML_BSSN_constraints1.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_constraints1_SelectBCs(CCTK_ARGUMENTS)
{
@@ -65,88 +66,89 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_constraints1,
+ LC_LOOP3VEC (ML_BSSN_constraints1,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
-
- CCTK_REAL eTttL, eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+
+ CCTK_REAL_VEC eTttL, eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
if (*stress_energy_state)
{
- eTttL = eTtt[index];
- eTtxL = eTtx[index];
- eTtyL = eTty[index];
- eTtzL = eTtz[index];
- eTxxL = eTxx[index];
- eTxyL = eTxy[index];
- eTxzL = eTxz[index];
- eTyyL = eTyy[index];
- eTyzL = eTyz[index];
- eTzzL = eTzz[index];
+ eTttL = vec_load(eTtt[index]);
+ eTtxL = vec_load(eTtx[index]);
+ eTtyL = vec_load(eTty[index]);
+ eTtzL = vec_load(eTtz[index]);
+ eTxxL = vec_load(eTxx[index]);
+ eTxyL = vec_load(eTxy[index]);
+ eTxzL = vec_load(eTxz[index]);
+ eTyyL = vec_load(eTyy[index]);
+ eTyzL = vec_load(eTyz[index]);
+ eTzzL = vec_load(eTzz[index]);
}
else
{
@@ -165,443 +167,449 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
- CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
- CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
- CCTK_REAL const PDstandardNth11gt11 = PDstandardNth11(&gt11[index]);
- CCTK_REAL const PDstandardNth22gt11 = PDstandardNth22(&gt11[index]);
- CCTK_REAL const PDstandardNth33gt11 = PDstandardNth33(&gt11[index]);
- CCTK_REAL const PDstandardNth12gt11 = PDstandardNth12(&gt11[index]);
- CCTK_REAL const PDstandardNth13gt11 = PDstandardNth13(&gt11[index]);
- CCTK_REAL const PDstandardNth23gt11 = PDstandardNth23(&gt11[index]);
- CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
- CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
- CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
- CCTK_REAL const PDstandardNth11gt12 = PDstandardNth11(&gt12[index]);
- CCTK_REAL const PDstandardNth22gt12 = PDstandardNth22(&gt12[index]);
- CCTK_REAL const PDstandardNth33gt12 = PDstandardNth33(&gt12[index]);
- CCTK_REAL const PDstandardNth12gt12 = PDstandardNth12(&gt12[index]);
- CCTK_REAL const PDstandardNth13gt12 = PDstandardNth13(&gt12[index]);
- CCTK_REAL const PDstandardNth23gt12 = PDstandardNth23(&gt12[index]);
- CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
- CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
- CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
- CCTK_REAL const PDstandardNth11gt13 = PDstandardNth11(&gt13[index]);
- CCTK_REAL const PDstandardNth22gt13 = PDstandardNth22(&gt13[index]);
- CCTK_REAL const PDstandardNth33gt13 = PDstandardNth33(&gt13[index]);
- CCTK_REAL const PDstandardNth12gt13 = PDstandardNth12(&gt13[index]);
- CCTK_REAL const PDstandardNth13gt13 = PDstandardNth13(&gt13[index]);
- CCTK_REAL const PDstandardNth23gt13 = PDstandardNth23(&gt13[index]);
- CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
- CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
- CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
- CCTK_REAL const PDstandardNth11gt22 = PDstandardNth11(&gt22[index]);
- CCTK_REAL const PDstandardNth22gt22 = PDstandardNth22(&gt22[index]);
- CCTK_REAL const PDstandardNth33gt22 = PDstandardNth33(&gt22[index]);
- CCTK_REAL const PDstandardNth12gt22 = PDstandardNth12(&gt22[index]);
- CCTK_REAL const PDstandardNth13gt22 = PDstandardNth13(&gt22[index]);
- CCTK_REAL const PDstandardNth23gt22 = PDstandardNth23(&gt22[index]);
- CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
- CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
- CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
- CCTK_REAL const PDstandardNth11gt23 = PDstandardNth11(&gt23[index]);
- CCTK_REAL const PDstandardNth22gt23 = PDstandardNth22(&gt23[index]);
- CCTK_REAL const PDstandardNth33gt23 = PDstandardNth33(&gt23[index]);
- CCTK_REAL const PDstandardNth12gt23 = PDstandardNth12(&gt23[index]);
- CCTK_REAL const PDstandardNth13gt23 = PDstandardNth13(&gt23[index]);
- CCTK_REAL const PDstandardNth23gt23 = PDstandardNth23(&gt23[index]);
- CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
- CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
- CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
- CCTK_REAL const PDstandardNth11gt33 = PDstandardNth11(&gt33[index]);
- CCTK_REAL const PDstandardNth22gt33 = PDstandardNth22(&gt33[index]);
- CCTK_REAL const PDstandardNth33gt33 = PDstandardNth33(&gt33[index]);
- CCTK_REAL const PDstandardNth12gt33 = PDstandardNth12(&gt33[index]);
- CCTK_REAL const PDstandardNth13gt33 = PDstandardNth13(&gt33[index]);
- CCTK_REAL const PDstandardNth23gt33 = PDstandardNth23(&gt33[index]);
- CCTK_REAL const PDstandardNth1phi = PDstandardNth1(&phi[index]);
- CCTK_REAL const PDstandardNth2phi = PDstandardNth2(&phi[index]);
- CCTK_REAL const PDstandardNth3phi = PDstandardNth3(&phi[index]);
- CCTK_REAL const PDstandardNth11phi = PDstandardNth11(&phi[index]);
- CCTK_REAL const PDstandardNth22phi = PDstandardNth22(&phi[index]);
- CCTK_REAL const PDstandardNth33phi = PDstandardNth33(&phi[index]);
- CCTK_REAL const PDstandardNth12phi = PDstandardNth12(&phi[index]);
- CCTK_REAL const PDstandardNth13phi = PDstandardNth13(&phi[index]);
- CCTK_REAL const PDstandardNth23phi = PDstandardNth23(&phi[index]);
- CCTK_REAL const PDstandardNth1Xt1 = PDstandardNth1(&Xt1[index]);
- CCTK_REAL const PDstandardNth2Xt1 = PDstandardNth2(&Xt1[index]);
- CCTK_REAL const PDstandardNth3Xt1 = PDstandardNth3(&Xt1[index]);
- CCTK_REAL const PDstandardNth1Xt2 = PDstandardNth1(&Xt2[index]);
- CCTK_REAL const PDstandardNth2Xt2 = PDstandardNth2(&Xt2[index]);
- CCTK_REAL const PDstandardNth3Xt2 = PDstandardNth3(&Xt2[index]);
- CCTK_REAL const PDstandardNth1Xt3 = PDstandardNth1(&Xt3[index]);
- CCTK_REAL const PDstandardNth2Xt3 = PDstandardNth2(&Xt3[index]);
- CCTK_REAL const PDstandardNth3Xt3 = PDstandardNth3(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt11 = PDstandardNth11(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt11 = PDstandardNth22(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt11 = PDstandardNth33(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt11 = PDstandardNth12(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt11 = PDstandardNth13(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt11 = PDstandardNth23(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt12 = PDstandardNth11(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt12 = PDstandardNth22(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt12 = PDstandardNth33(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt12 = PDstandardNth12(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt12 = PDstandardNth13(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt12 = PDstandardNth23(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt13 = PDstandardNth11(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt13 = PDstandardNth22(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt13 = PDstandardNth33(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt13 = PDstandardNth12(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt13 = PDstandardNth13(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt13 = PDstandardNth23(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt22 = PDstandardNth11(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt22 = PDstandardNth22(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt22 = PDstandardNth33(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt22 = PDstandardNth12(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt22 = PDstandardNth13(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt22 = PDstandardNth23(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt23 = PDstandardNth11(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt23 = PDstandardNth22(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt23 = PDstandardNth33(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt23 = PDstandardNth12(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt23 = PDstandardNth13(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt23 = PDstandardNth23(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth11gt33 = PDstandardNth11(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth22gt33 = PDstandardNth22(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth33gt33 = PDstandardNth33(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth12gt33 = PDstandardNth12(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth13gt33 = PDstandardNth13(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth23gt33 = PDstandardNth23(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth1phi = PDstandardNth1(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth2phi = PDstandardNth2(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth3phi = PDstandardNth3(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth11phi = PDstandardNth11(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth22phi = PDstandardNth22(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth33phi = PDstandardNth33(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth12phi = PDstandardNth12(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth13phi = PDstandardNth13(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth23phi = PDstandardNth23(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt1 = PDstandardNth1(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt1 = PDstandardNth2(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt1 = PDstandardNth3(&Xt1[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt2 = PDstandardNth1(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt2 = PDstandardNth2(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt2 = PDstandardNth3(&Xt2[index]);
+ CCTK_REAL_VEC const PDstandardNth1Xt3 = PDstandardNth1(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth2Xt3 = PDstandardNth2(&Xt3[index]);
+ CCTK_REAL_VEC const PDstandardNth3Xt3 = PDstandardNth3(&Xt3[index]);
/* Calculate temporaries and grid functions */
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL Gtl111 = 0.5*PDstandardNth1gt11;
+ CCTK_REAL_VEC Gtl111 = kmul(PDstandardNth1gt11,ToReal(0.5));
- CCTK_REAL Gtl112 = 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl112 = kmul(PDstandardNth2gt11,ToReal(0.5));
- CCTK_REAL Gtl113 = 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl113 = kmul(PDstandardNth3gt11,ToReal(0.5));
- CCTK_REAL Gtl122 = -0.5*PDstandardNth1gt22 + PDstandardNth2gt12;
+ CCTK_REAL_VEC Gtl122 =
+ kmadd(PDstandardNth1gt22,ToReal(-0.5),PDstandardNth2gt12);
- CCTK_REAL Gtl123 = 0.5*(-PDstandardNth1gt23 + PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl123 =
+ kmul(kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23)),ToReal(0.5));
- CCTK_REAL Gtl133 = -0.5*PDstandardNth1gt33 + PDstandardNth3gt13;
+ CCTK_REAL_VEC Gtl133 =
+ kmadd(PDstandardNth1gt33,ToReal(-0.5),PDstandardNth3gt13);
- CCTK_REAL Gtl211 = PDstandardNth1gt12 - 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl211 =
+ kmadd(PDstandardNth2gt11,ToReal(-0.5),PDstandardNth1gt12);
- CCTK_REAL Gtl212 = 0.5*PDstandardNth1gt22;
+ CCTK_REAL_VEC Gtl212 = kmul(PDstandardNth1gt22,ToReal(0.5));
- CCTK_REAL Gtl213 = 0.5*(PDstandardNth1gt23 - PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl213 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13)),ToReal(0.5));
- CCTK_REAL Gtl222 = 0.5*PDstandardNth2gt22;
+ CCTK_REAL_VEC Gtl222 = kmul(PDstandardNth2gt22,ToReal(0.5));
- CCTK_REAL Gtl223 = 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl223 = kmul(PDstandardNth3gt22,ToReal(0.5));
- CCTK_REAL Gtl233 = -0.5*PDstandardNth2gt33 + PDstandardNth3gt23;
+ CCTK_REAL_VEC Gtl233 =
+ kmadd(PDstandardNth2gt33,ToReal(-0.5),PDstandardNth3gt23);
- CCTK_REAL Gtl311 = PDstandardNth1gt13 - 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl311 =
+ kmadd(PDstandardNth3gt11,ToReal(-0.5),PDstandardNth1gt13);
- CCTK_REAL Gtl312 = 0.5*(PDstandardNth1gt23 + PDstandardNth2gt13 -
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl312 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12)),ToReal(0.5));
- CCTK_REAL Gtl313 = 0.5*PDstandardNth1gt33;
+ CCTK_REAL_VEC Gtl313 = kmul(PDstandardNth1gt33,ToReal(0.5));
- CCTK_REAL Gtl322 = PDstandardNth2gt23 - 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl322 =
+ kmadd(PDstandardNth3gt22,ToReal(-0.5),PDstandardNth2gt23);
- CCTK_REAL Gtl323 = 0.5*PDstandardNth2gt33;
+ CCTK_REAL_VEC Gtl323 = kmul(PDstandardNth2gt33,ToReal(0.5));
- CCTK_REAL Gtl333 = 0.5*PDstandardNth3gt33;
+ CCTK_REAL_VEC Gtl333 = kmul(PDstandardNth3gt33,ToReal(0.5));
- CCTK_REAL Gtlu111 = Gtl111*gtu11 + Gtl112*gtu12 + Gtl113*gtu13;
+ CCTK_REAL_VEC Gtlu111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl112,gtu12,kmul(Gtl113,gtu13)));
- CCTK_REAL Gtlu112 = Gtl111*gtu12 + Gtl112*gtu22 + Gtl113*gtu23;
+ CCTK_REAL_VEC Gtlu112 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl112,gtu22,kmul(Gtl113,gtu23)));
- CCTK_REAL Gtlu113 = Gtl111*gtu13 + Gtl112*gtu23 + Gtl113*gtu33;
+ CCTK_REAL_VEC Gtlu113 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl112,gtu23,kmul(Gtl113,gtu33)));
- CCTK_REAL Gtlu121 = Gtl112*gtu11 + Gtl122*gtu12 + Gtl123*gtu13;
+ CCTK_REAL_VEC Gtlu121 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl122,gtu12,kmul(Gtl123,gtu13)));
- CCTK_REAL Gtlu122 = Gtl112*gtu12 + Gtl122*gtu22 + Gtl123*gtu23;
+ CCTK_REAL_VEC Gtlu122 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl122,gtu22,kmul(Gtl123,gtu23)));
- CCTK_REAL Gtlu123 = Gtl112*gtu13 + Gtl122*gtu23 + Gtl123*gtu33;
+ CCTK_REAL_VEC Gtlu123 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl122,gtu23,kmul(Gtl123,gtu33)));
- CCTK_REAL Gtlu131 = Gtl113*gtu11 + Gtl123*gtu12 + Gtl133*gtu13;
+ CCTK_REAL_VEC Gtlu131 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl123,gtu12,kmul(Gtl133,gtu13)));
- CCTK_REAL Gtlu132 = Gtl113*gtu12 + Gtl123*gtu22 + Gtl133*gtu23;
+ CCTK_REAL_VEC Gtlu132 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl123,gtu22,kmul(Gtl133,gtu23)));
- CCTK_REAL Gtlu133 = Gtl113*gtu13 + Gtl123*gtu23 + Gtl133*gtu33;
+ CCTK_REAL_VEC Gtlu133 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl123,gtu23,kmul(Gtl133,gtu33)));
- CCTK_REAL Gtlu211 = Gtl211*gtu11 + Gtl212*gtu12 + Gtl213*gtu13;
+ CCTK_REAL_VEC Gtlu211 =
+ kmadd(Gtl211,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl213,gtu13)));
- CCTK_REAL Gtlu212 = Gtl211*gtu12 + Gtl212*gtu22 + Gtl213*gtu23;
+ CCTK_REAL_VEC Gtlu212 =
+ kmadd(Gtl211,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl213,gtu23)));
- CCTK_REAL Gtlu213 = Gtl211*gtu13 + Gtl212*gtu23 + Gtl213*gtu33;
+ CCTK_REAL_VEC Gtlu213 =
+ kmadd(Gtl211,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl213,gtu33)));
- CCTK_REAL Gtlu221 = Gtl212*gtu11 + Gtl222*gtu12 + Gtl223*gtu13;
+ CCTK_REAL_VEC Gtlu221 =
+ kmadd(Gtl212,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl223,gtu13)));
- CCTK_REAL Gtlu222 = Gtl212*gtu12 + Gtl222*gtu22 + Gtl223*gtu23;
+ CCTK_REAL_VEC Gtlu222 =
+ kmadd(Gtl212,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl223,gtu23)));
- CCTK_REAL Gtlu223 = Gtl212*gtu13 + Gtl222*gtu23 + Gtl223*gtu33;
+ CCTK_REAL_VEC Gtlu223 =
+ kmadd(Gtl212,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl223,gtu33)));
- CCTK_REAL Gtlu231 = Gtl213*gtu11 + Gtl223*gtu12 + Gtl233*gtu13;
+ CCTK_REAL_VEC Gtlu231 =
+ kmadd(Gtl213,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl233,gtu13)));
- CCTK_REAL Gtlu232 = Gtl213*gtu12 + Gtl223*gtu22 + Gtl233*gtu23;
+ CCTK_REAL_VEC Gtlu232 =
+ kmadd(Gtl213,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl233,gtu23)));
- CCTK_REAL Gtlu233 = Gtl213*gtu13 + Gtl223*gtu23 + Gtl233*gtu33;
+ CCTK_REAL_VEC Gtlu233 =
+ kmadd(Gtl213,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl233,gtu33)));
- CCTK_REAL Gtlu311 = Gtl311*gtu11 + Gtl312*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gtlu311 =
+ kmadd(Gtl311,gtu11,kmadd(Gtl312,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gtlu312 = Gtl311*gtu12 + Gtl312*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gtlu312 =
+ kmadd(Gtl311,gtu12,kmadd(Gtl312,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gtlu313 = Gtl311*gtu13 + Gtl312*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gtlu313 =
+ kmadd(Gtl311,gtu13,kmadd(Gtl312,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gtlu321 = Gtl312*gtu11 + Gtl322*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gtlu321 =
+ kmadd(Gtl312,gtu11,kmadd(Gtl322,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gtlu322 = Gtl312*gtu12 + Gtl322*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gtlu322 =
+ kmadd(Gtl312,gtu12,kmadd(Gtl322,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gtlu323 = Gtl312*gtu13 + Gtl322*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gtlu323 =
+ kmadd(Gtl312,gtu13,kmadd(Gtl322,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gtlu331 = Gtl313*gtu11 + Gtl323*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gtlu331 =
+ kmadd(Gtl313,gtu11,kmadd(Gtl323,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gtlu332 = Gtl313*gtu12 + Gtl323*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gtlu332 =
+ kmadd(Gtl313,gtu12,kmadd(Gtl323,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gtlu333 = Gtl313*gtu13 + Gtl323*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gtlu333 =
+ kmadd(Gtl313,gtu13,kmadd(Gtl323,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL Gt111 = Gtl111*gtu11 + Gtl211*gtu12 + Gtl311*gtu13;
+ CCTK_REAL_VEC Gt111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl211,gtu12,kmul(Gtl311,gtu13)));
- CCTK_REAL Gt211 = Gtl111*gtu12 + Gtl211*gtu22 + Gtl311*gtu23;
+ CCTK_REAL_VEC Gt211 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl211,gtu22,kmul(Gtl311,gtu23)));
- CCTK_REAL Gt311 = Gtl111*gtu13 + Gtl211*gtu23 + Gtl311*gtu33;
+ CCTK_REAL_VEC Gt311 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl211,gtu23,kmul(Gtl311,gtu33)));
- CCTK_REAL Gt112 = Gtl112*gtu11 + Gtl212*gtu12 + Gtl312*gtu13;
+ CCTK_REAL_VEC Gt112 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl312,gtu13)));
- CCTK_REAL Gt212 = Gtl112*gtu12 + Gtl212*gtu22 + Gtl312*gtu23;
+ CCTK_REAL_VEC Gt212 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl312,gtu23)));
- CCTK_REAL Gt312 = Gtl112*gtu13 + Gtl212*gtu23 + Gtl312*gtu33;
+ CCTK_REAL_VEC Gt312 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl312,gtu33)));
- CCTK_REAL Gt113 = Gtl113*gtu11 + Gtl213*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gt113 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl213,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gt213 = Gtl113*gtu12 + Gtl213*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gt213 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl213,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gt313 = Gtl113*gtu13 + Gtl213*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gt313 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl213,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gt122 = Gtl122*gtu11 + Gtl222*gtu12 + Gtl322*gtu13;
+ CCTK_REAL_VEC Gt122 =
+ kmadd(Gtl122,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl322,gtu13)));
- CCTK_REAL Gt222 = Gtl122*gtu12 + Gtl222*gtu22 + Gtl322*gtu23;
+ CCTK_REAL_VEC Gt222 =
+ kmadd(Gtl122,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl322,gtu23)));
- CCTK_REAL Gt322 = Gtl122*gtu13 + Gtl222*gtu23 + Gtl322*gtu33;
+ CCTK_REAL_VEC Gt322 =
+ kmadd(Gtl122,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl322,gtu33)));
- CCTK_REAL Gt123 = Gtl123*gtu11 + Gtl223*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gt123 =
+ kmadd(Gtl123,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gt223 = Gtl123*gtu12 + Gtl223*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gt223 =
+ kmadd(Gtl123,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gt323 = Gtl123*gtu13 + Gtl223*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gt323 =
+ kmadd(Gtl123,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gt133 = Gtl133*gtu11 + Gtl233*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gt133 =
+ kmadd(Gtl133,gtu11,kmadd(Gtl233,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gt233 = Gtl133*gtu12 + Gtl233*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gt233 =
+ kmadd(Gtl133,gtu12,kmadd(Gtl233,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gt333 = Gtl133*gtu13 + Gtl233*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gt333 =
+ kmadd(Gtl133,gtu13,kmadd(Gtl233,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL Xtn1 = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 +
- Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33;
+ CCTK_REAL_VEC Xtn1 =
+ kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmul(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn2 = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 +
- Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33;
+ CCTK_REAL_VEC Xtn2 =
+ kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmul(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2)))));
- CCTK_REAL Xtn3 = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 +
- Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33;
+ CCTK_REAL_VEC Xtn3 =
+ kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmul(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2)))));
- CCTK_REAL Rt11 = 0.5*(6*(Gt111*Gtlu111 + Gt112*Gtlu112 +
- Gt113*Gtlu113) + 4*(Gt211*Gtlu121 + Gt212*Gtlu122 + Gt213*Gtlu123 +
- Gt311*Gtlu131 + Gt312*Gtlu132 + Gt313*Gtlu133) -
- gtu11*PDstandardNth11gt11 - 2*gtu12*PDstandardNth12gt11 -
- 2*gtu13*PDstandardNth13gt11 + 2*(Gt211*Gtlu211 + Gt212*Gtlu212 +
- Gt213*Gtlu213 + Gt311*Gtlu311 + Gt312*Gtlu312 + Gt313*Gtlu313 +
- gt11L*PDstandardNth1Xt1) + 2*gt12L*PDstandardNth1Xt2 +
- 2*gt13L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt11 -
- 2*gtu23*PDstandardNth23gt11 - gtu33*PDstandardNth33gt11 + 2*Gtl111*Xtn1
- + 2*Gtl112*Xtn2 + 2*Gtl113*Xtn3);
+ CCTK_REAL_VEC Rt11 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt11,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt11,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt11,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt11,knmsub(gtu22,PDstandardNth22gt11,knmsub(gtu33,PDstandardNth33gt11,kmadd(kmadd(Gt211,Gtlu211,kmadd(Gt212,Gtlu212,kmadd(Gt213,Gtlu213,kmadd(Gt311,Gtlu311,kmadd(Gt312,Gtlu312,kmadd(Gt313,Gtlu313,kmul(gt11L,PDstandardNth1Xt1))))))),ToReal(2),kmadd(gt12L,kmul(PDstandardNth1Xt2,ToReal(2)),kmadd(gt13L,kmul(PDstandardNth1Xt3,ToReal(2)),kmadd(Gtl111,kmul(Xtn1,ToReal(2)),kmadd(Gtl112,kmul(Xtn2,ToReal(2)),kmadd(Gtl113,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt211,Gtlu121,kmadd(Gt212,Gtlu122,kmadd(Gt213,Gtlu123,kmadd(Gt311,Gtlu131,kmadd(Gt312,Gtlu132,kmul(Gt313,Gtlu133)))))),ToReal(4),kmul(kmadd(Gt111,Gtlu111,kmadd(Gt112,Gtlu112,kmul(Gt113,Gtlu113))),ToReal(6))))))))))))))));
- CCTK_REAL Rt12 = 0.5*(4*(Gt211*Gtlu221 + Gt212*Gtlu222 +
- Gt213*Gtlu223) + 2*(Gt112*Gtlu111 + Gt122*Gtlu112 + Gt123*Gtlu113 +
- Gt111*Gtlu121 + Gt212*Gtlu121 + Gt112*Gtlu122 + Gt222*Gtlu122 +
- Gt113*Gtlu123 + Gt223*Gtlu123 + Gt312*Gtlu131 + Gt322*Gtlu132 +
- Gt323*Gtlu133 + Gt111*Gtlu211 + Gt112*Gtlu212 + Gt113*Gtlu213 +
- Gt311*Gtlu231 + Gt312*Gtlu232 + Gt313*Gtlu233 + Gt311*Gtlu321 +
- Gt312*Gtlu322 + Gt313*Gtlu323) - gtu11*PDstandardNth11gt12 -
- 2*gtu12*PDstandardNth12gt12 - 2*gtu13*PDstandardNth13gt12 +
- gt12L*PDstandardNth1Xt1 + gt22L*PDstandardNth1Xt2 +
- gt23L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt12 -
- 2*gtu23*PDstandardNth23gt12 + gt11L*PDstandardNth2Xt1 +
- gt12L*PDstandardNth2Xt2 + gt13L*PDstandardNth2Xt3 -
- gtu33*PDstandardNth33gt12 + Gtl112*Xtn1 + Gtl211*Xtn1 + Gtl122*Xtn2 +
- Gtl212*Xtn2 + Gtl123*Xtn3 + Gtl213*Xtn3);
+ CCTK_REAL_VEC Rt12 =
+ kmul(ToReal(0.5),kmadd(gt12L,PDstandardNth1Xt1,kmadd(gt22L,PDstandardNth1Xt2,kmadd(gt23L,PDstandardNth1Xt3,kmadd(gt11L,PDstandardNth2Xt1,kmadd(gt12L,PDstandardNth2Xt2,kmadd(gt13L,PDstandardNth2Xt3,kmadd(Gtl112,Xtn1,kmadd(Gtl211,Xtn1,kmadd(Gtl122,Xtn2,kmadd(Gtl212,Xtn2,kmadd(Gtl123,Xtn3,kmadd(Gtl213,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt12,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt12,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt12,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt12,knmsub(gtu22,PDstandardNth22gt12,knmsub(gtu33,PDstandardNth33gt12,kmadd(kmadd(Gt112,Gtlu111,kmadd(Gt122,Gtlu112,kmadd(Gt123,Gtlu113,kmadd(Gt111,Gtlu121,kmadd(Gt212,Gtlu121,kmadd(Gt112,Gtlu122,kmadd(Gt222,Gtlu122,kmadd(Gt113,Gtlu123,kmadd(Gt223,Gtlu123,kmadd(Gt312,Gtlu131,kmadd(Gt322,Gtlu132,kmadd(Gt323,Gtlu133,kmadd(Gt111,Gtlu211,kmadd(Gt112,Gtlu212,kmadd(Gt113,Gtlu213,kmadd(Gt311,Gtlu231,kmadd(Gt312,Gtlu232,kmadd(Gt313,Gtlu233,kmadd(Gt311,Gtlu321,kmadd(Gt312,Gtlu322,kmul(Gt313,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt211,Gtlu221,kmadd(Gt212,Gtlu222,kmul(Gt213,Gtlu223))),ToReal(4))))))))))))))))))))));
- CCTK_REAL Rt13 = 0.5*(2*(Gt113*Gtlu111 + Gt123*Gtlu112 + Gt133*Gtlu113
- + Gt213*Gtlu121 + Gt223*Gtlu122 + Gt233*Gtlu123 + Gt111*Gtlu131 +
- Gt313*Gtlu131 + Gt112*Gtlu132 + Gt323*Gtlu132 + Gt113*Gtlu133 +
- Gt333*Gtlu133 + Gt211*Gtlu231 + Gt212*Gtlu232 + Gt213*Gtlu233 +
- Gt111*Gtlu311 + Gt112*Gtlu312 + Gt113*Gtlu313 + Gt211*Gtlu321 +
- Gt212*Gtlu322 + Gt213*Gtlu323) + 4*(Gt311*Gtlu331 + Gt312*Gtlu332 +
- Gt313*Gtlu333) - gtu11*PDstandardNth11gt13 -
- 2*gtu12*PDstandardNth12gt13 - 2*gtu13*PDstandardNth13gt13 +
- gt13L*PDstandardNth1Xt1 + gt23L*PDstandardNth1Xt2 +
- gt33L*PDstandardNth1Xt3 - gtu22*PDstandardNth22gt13 -
- 2*gtu23*PDstandardNth23gt13 - gtu33*PDstandardNth33gt13 +
- gt11L*PDstandardNth3Xt1 + gt12L*PDstandardNth3Xt2 +
- gt13L*PDstandardNth3Xt3 + Gtl113*Xtn1 + Gtl311*Xtn1 + Gtl123*Xtn2 +
- Gtl312*Xtn2 + Gtl133*Xtn3 + Gtl313*Xtn3);
+ CCTK_REAL_VEC Rt13 =
+ kmul(ToReal(0.5),kmadd(gt13L,PDstandardNth1Xt1,kmadd(gt23L,PDstandardNth1Xt2,kmadd(gt33L,PDstandardNth1Xt3,kmadd(gt11L,PDstandardNth3Xt1,kmadd(gt12L,PDstandardNth3Xt2,kmadd(gt13L,PDstandardNth3Xt3,kmadd(Gtl113,Xtn1,kmadd(Gtl311,Xtn1,kmadd(Gtl123,Xtn2,kmadd(Gtl312,Xtn2,kmadd(Gtl133,Xtn3,kmadd(Gtl313,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt13,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt13,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt13,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt13,knmsub(gtu22,PDstandardNth22gt13,knmsub(gtu33,PDstandardNth33gt13,kmadd(kmadd(Gt113,Gtlu111,kmadd(Gt123,Gtlu112,kmadd(Gt133,Gtlu113,kmadd(Gt213,Gtlu121,kmadd(Gt223,Gtlu122,kmadd(Gt233,Gtlu123,kmadd(Gt111,Gtlu131,kmadd(Gt313,Gtlu131,kmadd(Gt112,Gtlu132,kmadd(Gt323,Gtlu132,kmadd(Gt113,Gtlu133,kmadd(Gt333,Gtlu133,kmadd(Gt211,Gtlu231,kmadd(Gt212,Gtlu232,kmadd(Gt213,Gtlu233,kmadd(Gt111,Gtlu311,kmadd(Gt112,Gtlu312,kmadd(Gt113,Gtlu313,kmadd(Gt211,Gtlu321,kmadd(Gt212,Gtlu322,kmul(Gt213,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt311,Gtlu331,kmadd(Gt312,Gtlu332,kmul(Gt313,Gtlu333))),ToReal(4))))))))))))))))))))));
- CCTK_REAL Rt22 = 0.5*(6*(Gt212*Gtlu221 + Gt222*Gtlu222 +
- Gt223*Gtlu223) + 4*(Gt112*Gtlu211 + Gt122*Gtlu212 + Gt123*Gtlu213 +
- Gt312*Gtlu231 + Gt322*Gtlu232 + Gt323*Gtlu233) -
- gtu11*PDstandardNth11gt22 - 2*gtu12*PDstandardNth12gt22 -
- 2*gtu13*PDstandardNth13gt22 - gtu22*PDstandardNth22gt22 -
- 2*gtu23*PDstandardNth23gt22 + 2*(Gt112*Gtlu121 + Gt122*Gtlu122 +
- Gt123*Gtlu123 + Gt312*Gtlu321 + Gt322*Gtlu322 + Gt323*Gtlu323 +
- gt12L*PDstandardNth2Xt1) + 2*gt22L*PDstandardNth2Xt2 +
- 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 +
- 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3);
-
- CCTK_REAL Rt23 = 0.5*(2*(Gt112*Gtlu131 + Gt122*Gtlu132 + Gt123*Gtlu133
- + Gt113*Gtlu211 + Gt123*Gtlu212 + Gt133*Gtlu213 + Gt213*Gtlu221 +
- Gt223*Gtlu222 + Gt233*Gtlu223 + Gt212*Gtlu231 + Gt313*Gtlu231 +
- Gt222*Gtlu232 + Gt323*Gtlu232 + Gt223*Gtlu233 + Gt333*Gtlu233 +
- Gt112*Gtlu311 + Gt122*Gtlu312 + Gt123*Gtlu313 + Gt212*Gtlu321 +
- Gt222*Gtlu322 + Gt223*Gtlu323) + 4*(Gt312*Gtlu331 + Gt322*Gtlu332 +
- Gt323*Gtlu333) - gtu11*PDstandardNth11gt23 -
- 2*gtu12*PDstandardNth12gt23 - 2*gtu13*PDstandardNth13gt23 -
- gtu22*PDstandardNth22gt23 - 2*gtu23*PDstandardNth23gt23 +
- gt13L*PDstandardNth2Xt1 + gt23L*PDstandardNth2Xt2 +
- gt33L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt23 +
- gt12L*PDstandardNth3Xt1 + gt22L*PDstandardNth3Xt2 +
- gt23L*PDstandardNth3Xt3 + Gtl213*Xtn1 + Gtl312*Xtn1 + Gtl223*Xtn2 +
- Gtl322*Xtn2 + Gtl233*Xtn3 + Gtl323*Xtn3);
-
- CCTK_REAL Rt33 = 0.5*(4*(Gt113*Gtlu311 + Gt123*Gtlu312 + Gt133*Gtlu313
- + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323) + 6*(Gt313*Gtlu331 +
- Gt323*Gtlu332 + Gt333*Gtlu333) - gtu11*PDstandardNth11gt33 -
- 2*gtu12*PDstandardNth12gt33 - 2*gtu13*PDstandardNth13gt33 -
- gtu22*PDstandardNth22gt33 - 2*gtu23*PDstandardNth23gt33 -
- gtu33*PDstandardNth33gt33 + 2*(Gt113*Gtlu131 + Gt123*Gtlu132 +
- Gt133*Gtlu133 + Gt213*Gtlu231 + Gt223*Gtlu232 + Gt233*Gtlu233 +
- gt13L*PDstandardNth3Xt1) + 2*gt23L*PDstandardNth3Xt2 +
- 2*gt33L*PDstandardNth3Xt3 + 2*Gtl313*Xtn1 + 2*Gtl323*Xtn2 +
- 2*Gtl333*Xtn3);
-
- CCTK_REAL fac1 = IfThen(conformalMethod,-0.5*INV(phiL),1);
-
- CCTK_REAL cdphi1 = fac1*PDstandardNth1phi;
-
- CCTK_REAL cdphi2 = fac1*PDstandardNth2phi;
-
- CCTK_REAL cdphi3 = fac1*PDstandardNth3phi;
-
- CCTK_REAL fac2 = IfThen(conformalMethod,0.5*INV(SQR(phiL)),0);
-
- CCTK_REAL cdphi211 = -(fac1*(-PDstandardNth11phi +
- Gt111*PDstandardNth1phi + Gt211*PDstandardNth2phi +
- Gt311*PDstandardNth3phi)) + fac2*SQR(PDstandardNth1phi);
-
- CCTK_REAL cdphi212 = fac2*PDstandardNth1phi*PDstandardNth2phi -
- fac1*(-PDstandardNth12phi + Gt112*PDstandardNth1phi +
- Gt212*PDstandardNth2phi + Gt312*PDstandardNth3phi);
-
- CCTK_REAL cdphi213 = fac2*PDstandardNth1phi*PDstandardNth3phi -
- fac1*(-PDstandardNth13phi + Gt113*PDstandardNth1phi +
- Gt213*PDstandardNth2phi + Gt313*PDstandardNth3phi);
-
- CCTK_REAL cdphi222 = -(fac1*(Gt122*PDstandardNth1phi -
- PDstandardNth22phi + Gt222*PDstandardNth2phi +
- Gt322*PDstandardNth3phi)) + fac2*SQR(PDstandardNth2phi);
-
- CCTK_REAL cdphi223 = fac2*PDstandardNth2phi*PDstandardNth3phi -
- fac1*(Gt123*PDstandardNth1phi - PDstandardNth23phi +
- Gt223*PDstandardNth2phi + Gt323*PDstandardNth3phi);
-
- CCTK_REAL cdphi233 = -(fac1*(Gt133*PDstandardNth1phi +
- Gt233*PDstandardNth2phi - PDstandardNth33phi +
- Gt333*PDstandardNth3phi)) + fac2*SQR(PDstandardNth3phi);
-
- CCTK_REAL Rphi11 = -2*(cdphi211 + 2*(-1 + gt11L*gtu11)*SQR(cdphi1) +
- gt11L*(cdphi211*gtu11 + 4*(cdphi1*(cdphi2*gtu12 + cdphi3*gtu13) +
- cdphi2*cdphi3*gtu23) + cdphi233*gtu33 + gtu22*(cdphi222 +
- 2*SQR(cdphi2)) + 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu33*SQR(cdphi3))));
+ CCTK_REAL_VEC Rt22 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt22,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt22,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt22,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt22,knmsub(gtu22,PDstandardNth22gt22,knmsub(gtu33,PDstandardNth33gt22,kmadd(kmadd(Gt112,Gtlu121,kmadd(Gt122,Gtlu122,kmadd(Gt123,Gtlu123,kmadd(Gt312,Gtlu321,kmadd(Gt322,Gtlu322,kmadd(Gt323,Gtlu323,kmul(gt12L,PDstandardNth2Xt1))))))),ToReal(2),kmadd(gt22L,kmul(PDstandardNth2Xt2,ToReal(2)),kmadd(gt23L,kmul(PDstandardNth2Xt3,ToReal(2)),kmadd(Gtl212,kmul(Xtn1,ToReal(2)),kmadd(Gtl222,kmul(Xtn2,ToReal(2)),kmadd(Gtl223,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt112,Gtlu211,kmadd(Gt122,Gtlu212,kmadd(Gt123,Gtlu213,kmadd(Gt312,Gtlu231,kmadd(Gt322,Gtlu232,kmul(Gt323,Gtlu233)))))),ToReal(4),kmul(kmadd(Gt212,Gtlu221,kmadd(Gt222,Gtlu222,kmul(Gt223,Gtlu223))),ToReal(6))))))))))))))));
- CCTK_REAL Rphi12 = -2*(cdphi212 + cdphi1*(cdphi2*(-2 + 4*gt12L*gtu12)
- + 4*cdphi3*gt12L*gtu13) + gt12L*(cdphi211*gtu11 + 4*cdphi2*cdphi3*gtu23
- + 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu11*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2)) + gtu33*(cdphi233
- + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC Rt23 =
+ kmul(ToReal(0.5),kmadd(gt13L,PDstandardNth2Xt1,kmadd(gt23L,PDstandardNth2Xt2,kmadd(gt33L,PDstandardNth2Xt3,kmadd(gt12L,PDstandardNth3Xt1,kmadd(gt22L,PDstandardNth3Xt2,kmadd(gt23L,PDstandardNth3Xt3,kmadd(Gtl213,Xtn1,kmadd(Gtl312,Xtn1,kmadd(Gtl223,Xtn2,kmadd(Gtl322,Xtn2,kmadd(Gtl233,Xtn3,kmadd(Gtl323,Xtn3,kmadd(gtu12,kmul(PDstandardNth12gt23,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt23,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt23,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt23,knmsub(gtu22,PDstandardNth22gt23,knmsub(gtu33,PDstandardNth33gt23,kmadd(kmadd(Gt112,Gtlu131,kmadd(Gt122,Gtlu132,kmadd(Gt123,Gtlu133,kmadd(Gt113,Gtlu211,kmadd(Gt123,Gtlu212,kmadd(Gt133,Gtlu213,kmadd(Gt213,Gtlu221,kmadd(Gt223,Gtlu222,kmadd(Gt233,Gtlu223,kmadd(Gt212,Gtlu231,kmadd(Gt313,Gtlu231,kmadd(Gt222,Gtlu232,kmadd(Gt323,Gtlu232,kmadd(Gt223,Gtlu233,kmadd(Gt333,Gtlu233,kmadd(Gt112,Gtlu311,kmadd(Gt122,Gtlu312,kmadd(Gt123,Gtlu313,kmadd(Gt212,Gtlu321,kmadd(Gt222,Gtlu322,kmul(Gt223,Gtlu323))))))))))))))))))))),ToReal(2),kmul(kmadd(Gt312,Gtlu331,kmadd(Gt322,Gtlu332,kmul(Gt323,Gtlu333))),ToReal(4))))))))))))))))))))));
- CCTK_REAL Rphi13 = -2*(cdphi213 + cdphi1*(4*cdphi2*gt13L*gtu12 +
- cdphi3*(-2 + 4*gt13L*gtu13)) + gt13L*(cdphi211*gtu11 +
- 4*cdphi2*cdphi3*gtu23 + 2*(cdphi212*gtu12 + cdphi213*gtu13 +
- cdphi223*gtu23 + gtu11*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2))
- + gtu33*(cdphi233 + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC Rt33 =
+ kmul(ToReal(0.5),kmadd(gtu12,kmul(PDstandardNth12gt33,ToReal(-2)),kmadd(gtu13,kmul(PDstandardNth13gt33,ToReal(-2)),kmadd(gtu23,kmul(PDstandardNth23gt33,ToReal(-2)),knmsub(gtu11,PDstandardNth11gt33,knmsub(gtu22,PDstandardNth22gt33,knmsub(gtu33,PDstandardNth33gt33,kmadd(kmadd(Gt113,Gtlu131,kmadd(Gt123,Gtlu132,kmadd(Gt133,Gtlu133,kmadd(Gt213,Gtlu231,kmadd(Gt223,Gtlu232,kmadd(Gt233,Gtlu233,kmul(gt13L,PDstandardNth3Xt1))))))),ToReal(2),kmadd(gt23L,kmul(PDstandardNth3Xt2,ToReal(2)),kmadd(gt33L,kmul(PDstandardNth3Xt3,ToReal(2)),kmadd(Gtl313,kmul(Xtn1,ToReal(2)),kmadd(Gtl323,kmul(Xtn2,ToReal(2)),kmadd(Gtl333,kmul(Xtn3,ToReal(2)),kmadd(kmadd(Gt113,Gtlu311,kmadd(Gt123,Gtlu312,kmadd(Gt133,Gtlu313,kmadd(Gt213,Gtlu321,kmadd(Gt223,Gtlu322,kmul(Gt233,Gtlu323)))))),ToReal(4),kmul(kmadd(Gt313,Gtlu331,kmadd(Gt323,Gtlu332,kmul(Gt333,Gtlu333))),ToReal(6))))))))))))))));
- CCTK_REAL Rphi22 = -2*(cdphi222 + 2*(-1 + gt22L*gtu22)*SQR(cdphi2) +
- gt22L*(cdphi222*gtu22 + 4*(cdphi1*cdphi3*gtu13 + cdphi2*(cdphi1*gtu12 +
- cdphi3*gtu23)) + cdphi233*gtu33 + gtu11*(cdphi211 + 2*SQR(cdphi1)) +
- 2*(cdphi212*gtu12 + cdphi213*gtu13 + cdphi223*gtu23 +
- gtu33*SQR(cdphi3))));
+ CCTK_REAL_VEC fac1 =
+ IfThen(conformalMethod,kmul(INV(phiL),ToReal(-0.5)),ToReal(1));
- CCTK_REAL Rphi23 = -2*(cdphi223 + cdphi2*(4*cdphi1*gt23L*gtu12 +
- cdphi3*(-2 + 4*gt23L*gtu23)) + gt23L*(4*cdphi1*cdphi3*gtu13 +
- cdphi222*gtu22 + gtu11*(cdphi211 + 2*SQR(cdphi1)) + 2*(cdphi212*gtu12 +
- cdphi213*gtu13 + cdphi223*gtu23 + gtu22*SQR(cdphi2)) + gtu33*(cdphi233
- + 2*SQR(cdphi3))));
+ CCTK_REAL_VEC cdphi1 = kmul(fac1,PDstandardNth1phi);
- CCTK_REAL Rphi33 = -2*(cdphi233 + gt33L*((4*cdphi1*cdphi2 +
- 2*cdphi212)*gtu12 + 4*cdphi3*(cdphi1*gtu13 + cdphi2*gtu23) +
- 2*(cdphi213*gtu13 + cdphi223*gtu23) + cdphi233*gtu33 + gtu11*(cdphi211
- + 2*SQR(cdphi1)) + gtu22*(cdphi222 + 2*SQR(cdphi2))) + 2*(-1 +
- gt33L*gtu33)*SQR(cdphi3));
+ CCTK_REAL_VEC cdphi2 = kmul(fac1,PDstandardNth2phi);
- CCTK_REAL e4phi = IfThen(conformalMethod,INV(SQR(phiL)),exp(4*phiL));
+ CCTK_REAL_VEC cdphi3 = kmul(fac1,PDstandardNth3phi);
- CCTK_REAL em4phi = INV(e4phi);
+ CCTK_REAL_VEC fac2 =
+ IfThen(conformalMethod,kmul(INV(SQR(phiL)),ToReal(0.5)),ToReal(0));
- CCTK_REAL gu11 = em4phi*gtu11;
+ CCTK_REAL_VEC cdphi211 =
+ kmsub(fac2,SQR(PDstandardNth1phi),kmul(fac1,kmadd(Gt111,PDstandardNth1phi,kmadd(Gt211,PDstandardNth2phi,kmsub(Gt311,PDstandardNth3phi,PDstandardNth11phi)))));
- CCTK_REAL gu12 = em4phi*gtu12;
+ CCTK_REAL_VEC cdphi212 =
+ kmsub(fac2,kmul(PDstandardNth1phi,PDstandardNth2phi),kmul(fac1,kmadd(Gt112,PDstandardNth1phi,kmadd(Gt212,PDstandardNth2phi,kmsub(Gt312,PDstandardNth3phi,PDstandardNth12phi)))));
- CCTK_REAL gu13 = em4phi*gtu13;
+ CCTK_REAL_VEC cdphi213 =
+ kmsub(fac2,kmul(PDstandardNth1phi,PDstandardNth3phi),kmul(fac1,kmadd(Gt113,PDstandardNth1phi,kmadd(Gt213,PDstandardNth2phi,kmsub(Gt313,PDstandardNth3phi,PDstandardNth13phi)))));
- CCTK_REAL gu22 = em4phi*gtu22;
+ CCTK_REAL_VEC cdphi222 =
+ kmsub(fac2,SQR(PDstandardNth2phi),kmul(fac1,kmadd(Gt122,PDstandardNth1phi,kmadd(Gt222,PDstandardNth2phi,kmsub(Gt322,PDstandardNth3phi,PDstandardNth22phi)))));
- CCTK_REAL gu23 = em4phi*gtu23;
+ CCTK_REAL_VEC cdphi223 =
+ kmsub(fac2,kmul(PDstandardNth2phi,PDstandardNth3phi),kmul(fac1,kmadd(Gt123,PDstandardNth1phi,kmadd(Gt223,PDstandardNth2phi,kmsub(Gt323,PDstandardNth3phi,PDstandardNth23phi)))));
- CCTK_REAL gu33 = em4phi*gtu33;
+ CCTK_REAL_VEC cdphi233 =
+ kmsub(fac2,SQR(PDstandardNth3phi),kmul(fac1,kmadd(Gt133,PDstandardNth1phi,kmadd(Gt233,PDstandardNth2phi,kmsub(Gt333,PDstandardNth3phi,PDstandardNth33phi)))));
- CCTK_REAL R11 = Rphi11 + Rt11;
+ CCTK_REAL_VEC Rphi11 =
+ kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4))))))))));
- CCTK_REAL R12 = Rphi12 + Rt12;
+ CCTK_REAL_VEC Rphi12 =
+ kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL R13 = Rphi13 + Rt13;
+ CCTK_REAL_VEC Rphi13 =
+ kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL R22 = Rphi22 + Rt22;
+ CCTK_REAL_VEC Rphi22 =
+ kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4))))))))));
- CCTK_REAL R23 = Rphi23 + Rt23;
+ CCTK_REAL_VEC Rphi23 =
+ kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2))))))));
- CCTK_REAL R33 = Rphi33 + Rt33;
+ CCTK_REAL_VEC Rphi33 =
+ kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4))))))))))))));
- CCTK_REAL trR = gu11*R11 + gu22*R22 + 2*(gu12*R12 + gu13*R13 +
- gu23*R23) + gu33*R33;
+ CCTK_REAL_VEC e4phi =
+ IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4))));
- CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu12 + At13L*gtu13;
+ CCTK_REAL_VEC em4phi = INV(e4phi);
- CCTK_REAL Atm21 = At11L*gtu12 + At12L*gtu22 + At13L*gtu23;
+ CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11);
- CCTK_REAL Atm31 = At11L*gtu13 + At12L*gtu23 + At13L*gtu33;
+ CCTK_REAL_VEC gu12 = kmul(em4phi,gtu12);
- CCTK_REAL Atm12 = At12L*gtu11 + At22L*gtu12 + At23L*gtu13;
+ CCTK_REAL_VEC gu13 = kmul(em4phi,gtu13);
- CCTK_REAL Atm22 = At12L*gtu12 + At22L*gtu22 + At23L*gtu23;
+ CCTK_REAL_VEC gu22 = kmul(em4phi,gtu22);
- CCTK_REAL Atm32 = At12L*gtu13 + At22L*gtu23 + At23L*gtu33;
+ CCTK_REAL_VEC gu23 = kmul(em4phi,gtu23);
- CCTK_REAL Atm13 = At13L*gtu11 + At23L*gtu12 + At33L*gtu13;
+ CCTK_REAL_VEC gu33 = kmul(em4phi,gtu33);
- CCTK_REAL Atm23 = At13L*gtu12 + At23L*gtu22 + At33L*gtu23;
+ CCTK_REAL_VEC R11 = kadd(Rphi11,Rt11);
- CCTK_REAL Atm33 = At13L*gtu13 + At23L*gtu23 + At33L*gtu33;
+ CCTK_REAL_VEC R12 = kadd(Rphi12,Rt12);
- CCTK_REAL rho = INV(SQR(alphaL))*(eTttL - 2*(beta2L*eTtyL +
- beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + beta3L*eTxzL) +
- beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + eTyyL*SQR(beta2L) +
- eTzzL*SQR(beta3L));
+ CCTK_REAL_VEC R13 = kadd(Rphi13,Rt13);
- CCTK_REAL HL = -2.*(Atm12*Atm21 + Atm13*Atm31 + Atm23*Atm32) -
- 50.26548245743669181540229413247204614715*rho + trR - 1.*(SQR(Atm11) +
- SQR(Atm22) + SQR(Atm33)) +
- 0.6666666666666666666666666666666666666667*SQR(trKL);
+ CCTK_REAL_VEC R22 = kadd(Rphi22,Rt22);
+
+ CCTK_REAL_VEC R23 = kadd(Rphi23,Rt23);
+
+ CCTK_REAL_VEC R33 = kadd(Rphi33,Rt33);
+
+ CCTK_REAL_VEC trR =
+ kmadd(gu11,R11,kmadd(gu22,R22,kmadd(gu33,R33,kmul(kmadd(gu12,R12,kmadd(gu13,R13,kmul(gu23,R23))),ToReal(2)))));
+
+ CCTK_REAL_VEC Atm11 =
+ kmadd(At11L,gtu11,kmadd(At12L,gtu12,kmul(At13L,gtu13)));
+
+ CCTK_REAL_VEC Atm21 =
+ kmadd(At11L,gtu12,kmadd(At12L,gtu22,kmul(At13L,gtu23)));
+
+ CCTK_REAL_VEC Atm31 =
+ kmadd(At11L,gtu13,kmadd(At12L,gtu23,kmul(At13L,gtu33)));
+
+ CCTK_REAL_VEC Atm12 =
+ kmadd(At12L,gtu11,kmadd(At22L,gtu12,kmul(At23L,gtu13)));
+
+ CCTK_REAL_VEC Atm22 =
+ kmadd(At12L,gtu12,kmadd(At22L,gtu22,kmul(At23L,gtu23)));
+
+ CCTK_REAL_VEC Atm32 =
+ kmadd(At12L,gtu13,kmadd(At22L,gtu23,kmul(At23L,gtu33)));
+
+ CCTK_REAL_VEC Atm13 =
+ kmadd(At13L,gtu11,kmadd(At23L,gtu12,kmul(At33L,gtu13)));
+
+ CCTK_REAL_VEC Atm23 =
+ kmadd(At13L,gtu12,kmadd(At23L,gtu22,kmul(At33L,gtu23)));
+
+ CCTK_REAL_VEC Atm33 =
+ kmadd(At13L,gtu13,kmadd(At23L,gtu23,kmul(At33L,gtu33)));
+
+ CCTK_REAL_VEC rho =
+ kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2))))))));
+
+ CCTK_REAL_VEC HL =
+ kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667))))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(H[index],HL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(H[index],HL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- H[index] = HL;
+ vec_store_nta(H[index],HL);
}
- LC_ENDLOOP3 (ML_BSSN_constraints1);
+ LC_ENDLOOP3VEC (ML_BSSN_constraints1);
}
extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_constraints2.cc b/ML_BSSN/src/ML_BSSN_constraints2.cc
index bc123f0..442917a 100644
--- a/ML_BSSN/src/ML_BSSN_constraints2.cc
+++ b/ML_BSSN/src/ML_BSSN_constraints2.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_constraints2_SelectBCs(CCTK_ARGUMENTS)
{
@@ -74,87 +75,88 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_constraints2,
+ LC_LOOP3VEC (ML_BSSN_constraints2,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
-
- CCTK_REAL eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
+
+ CCTK_REAL_VEC eTtxL, eTtyL, eTtzL, eTxxL, eTxyL, eTxzL, eTyyL, eTyzL, eTzzL;
if (*stress_energy_state)
{
- eTtxL = eTtx[index];
- eTtyL = eTty[index];
- eTtzL = eTtz[index];
- eTxxL = eTxx[index];
- eTxyL = eTxy[index];
- eTxzL = eTxz[index];
- eTyyL = eTyy[index];
- eTyzL = eTyz[index];
- eTzzL = eTzz[index];
+ eTtxL = vec_load(eTtx[index]);
+ eTtyL = vec_load(eTty[index]);
+ eTtzL = vec_load(eTtz[index]);
+ eTxxL = vec_load(eTxx[index]);
+ eTxyL = vec_load(eTxy[index]);
+ eTxzL = vec_load(eTxz[index]);
+ eTyyL = vec_load(eTyy[index]);
+ eTyzL = vec_load(eTyz[index]);
+ eTzzL = vec_load(eTzz[index]);
}
else
{
@@ -172,236 +174,267 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDstandardNth1At11 = PDstandardNth1(&At11[index]);
- CCTK_REAL const PDstandardNth2At11 = PDstandardNth2(&At11[index]);
- CCTK_REAL const PDstandardNth3At11 = PDstandardNth3(&At11[index]);
- CCTK_REAL const PDstandardNth1At12 = PDstandardNth1(&At12[index]);
- CCTK_REAL const PDstandardNth2At12 = PDstandardNth2(&At12[index]);
- CCTK_REAL const PDstandardNth3At12 = PDstandardNth3(&At12[index]);
- CCTK_REAL const PDstandardNth1At13 = PDstandardNth1(&At13[index]);
- CCTK_REAL const PDstandardNth2At13 = PDstandardNth2(&At13[index]);
- CCTK_REAL const PDstandardNth3At13 = PDstandardNth3(&At13[index]);
- CCTK_REAL const PDstandardNth1At22 = PDstandardNth1(&At22[index]);
- CCTK_REAL const PDstandardNth2At22 = PDstandardNth2(&At22[index]);
- CCTK_REAL const PDstandardNth3At22 = PDstandardNth3(&At22[index]);
- CCTK_REAL const PDstandardNth1At23 = PDstandardNth1(&At23[index]);
- CCTK_REAL const PDstandardNth2At23 = PDstandardNth2(&At23[index]);
- CCTK_REAL const PDstandardNth3At23 = PDstandardNth3(&At23[index]);
- CCTK_REAL const PDstandardNth1At33 = PDstandardNth1(&At33[index]);
- CCTK_REAL const PDstandardNth2At33 = PDstandardNth2(&At33[index]);
- CCTK_REAL const PDstandardNth3At33 = PDstandardNth3(&At33[index]);
- CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
- CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
- CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
- CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
- CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
- CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
- CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
- CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
- CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
- CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
- CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
- CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
- CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
- CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
- CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
- CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
- CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
- CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
- CCTK_REAL const PDstandardNth1phi = PDstandardNth1(&phi[index]);
- CCTK_REAL const PDstandardNth2phi = PDstandardNth2(&phi[index]);
- CCTK_REAL const PDstandardNth3phi = PDstandardNth3(&phi[index]);
- CCTK_REAL const PDstandardNth1trK = PDstandardNth1(&trK[index]);
- CCTK_REAL const PDstandardNth2trK = PDstandardNth2(&trK[index]);
- CCTK_REAL const PDstandardNth3trK = PDstandardNth3(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth1At11 = PDstandardNth1(&At11[index]);
+ CCTK_REAL_VEC const PDstandardNth2At11 = PDstandardNth2(&At11[index]);
+ CCTK_REAL_VEC const PDstandardNth3At11 = PDstandardNth3(&At11[index]);
+ CCTK_REAL_VEC const PDstandardNth1At12 = PDstandardNth1(&At12[index]);
+ CCTK_REAL_VEC const PDstandardNth2At12 = PDstandardNth2(&At12[index]);
+ CCTK_REAL_VEC const PDstandardNth3At12 = PDstandardNth3(&At12[index]);
+ CCTK_REAL_VEC const PDstandardNth1At13 = PDstandardNth1(&At13[index]);
+ CCTK_REAL_VEC const PDstandardNth2At13 = PDstandardNth2(&At13[index]);
+ CCTK_REAL_VEC const PDstandardNth3At13 = PDstandardNth3(&At13[index]);
+ CCTK_REAL_VEC const PDstandardNth1At22 = PDstandardNth1(&At22[index]);
+ CCTK_REAL_VEC const PDstandardNth2At22 = PDstandardNth2(&At22[index]);
+ CCTK_REAL_VEC const PDstandardNth3At22 = PDstandardNth3(&At22[index]);
+ CCTK_REAL_VEC const PDstandardNth1At23 = PDstandardNth1(&At23[index]);
+ CCTK_REAL_VEC const PDstandardNth2At23 = PDstandardNth2(&At23[index]);
+ CCTK_REAL_VEC const PDstandardNth3At23 = PDstandardNth3(&At23[index]);
+ CCTK_REAL_VEC const PDstandardNth1At33 = PDstandardNth1(&At33[index]);
+ CCTK_REAL_VEC const PDstandardNth2At33 = PDstandardNth2(&At33[index]);
+ CCTK_REAL_VEC const PDstandardNth3At33 = PDstandardNth3(&At33[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth1phi = PDstandardNth1(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth2phi = PDstandardNth2(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth3phi = PDstandardNth3(&phi[index]);
+ CCTK_REAL_VEC const PDstandardNth1trK = PDstandardNth1(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth2trK = PDstandardNth2(&trK[index]);
+ CCTK_REAL_VEC const PDstandardNth3trK = PDstandardNth3(&trK[index]);
/* Calculate temporaries and grid functions */
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL Gtl111 = 0.5*PDstandardNth1gt11;
+ CCTK_REAL_VEC Gtl111 = kmul(PDstandardNth1gt11,ToReal(0.5));
- CCTK_REAL Gtl112 = 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl112 = kmul(PDstandardNth2gt11,ToReal(0.5));
- CCTK_REAL Gtl113 = 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl113 = kmul(PDstandardNth3gt11,ToReal(0.5));
- CCTK_REAL Gtl122 = -0.5*PDstandardNth1gt22 + PDstandardNth2gt12;
+ CCTK_REAL_VEC Gtl122 =
+ kmadd(PDstandardNth1gt22,ToReal(-0.5),PDstandardNth2gt12);
- CCTK_REAL Gtl123 = 0.5*(-PDstandardNth1gt23 + PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl123 =
+ kmul(kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23)),ToReal(0.5));
- CCTK_REAL Gtl133 = -0.5*PDstandardNth1gt33 + PDstandardNth3gt13;
+ CCTK_REAL_VEC Gtl133 =
+ kmadd(PDstandardNth1gt33,ToReal(-0.5),PDstandardNth3gt13);
- CCTK_REAL Gtl211 = PDstandardNth1gt12 - 0.5*PDstandardNth2gt11;
+ CCTK_REAL_VEC Gtl211 =
+ kmadd(PDstandardNth2gt11,ToReal(-0.5),PDstandardNth1gt12);
- CCTK_REAL Gtl212 = 0.5*PDstandardNth1gt22;
+ CCTK_REAL_VEC Gtl212 = kmul(PDstandardNth1gt22,ToReal(0.5));
- CCTK_REAL Gtl213 = 0.5*(PDstandardNth1gt23 - PDstandardNth2gt13 +
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl213 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13)),ToReal(0.5));
- CCTK_REAL Gtl222 = 0.5*PDstandardNth2gt22;
+ CCTK_REAL_VEC Gtl222 = kmul(PDstandardNth2gt22,ToReal(0.5));
- CCTK_REAL Gtl223 = 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl223 = kmul(PDstandardNth3gt22,ToReal(0.5));
- CCTK_REAL Gtl233 = -0.5*PDstandardNth2gt33 + PDstandardNth3gt23;
+ CCTK_REAL_VEC Gtl233 =
+ kmadd(PDstandardNth2gt33,ToReal(-0.5),PDstandardNth3gt23);
- CCTK_REAL Gtl311 = PDstandardNth1gt13 - 0.5*PDstandardNth3gt11;
+ CCTK_REAL_VEC Gtl311 =
+ kmadd(PDstandardNth3gt11,ToReal(-0.5),PDstandardNth1gt13);
- CCTK_REAL Gtl312 = 0.5*(PDstandardNth1gt23 + PDstandardNth2gt13 -
- PDstandardNth3gt12);
+ CCTK_REAL_VEC Gtl312 =
+ kmul(kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12)),ToReal(0.5));
- CCTK_REAL Gtl313 = 0.5*PDstandardNth1gt33;
+ CCTK_REAL_VEC Gtl313 = kmul(PDstandardNth1gt33,ToReal(0.5));
- CCTK_REAL Gtl322 = PDstandardNth2gt23 - 0.5*PDstandardNth3gt22;
+ CCTK_REAL_VEC Gtl322 =
+ kmadd(PDstandardNth3gt22,ToReal(-0.5),PDstandardNth2gt23);
- CCTK_REAL Gtl323 = 0.5*PDstandardNth2gt33;
+ CCTK_REAL_VEC Gtl323 = kmul(PDstandardNth2gt33,ToReal(0.5));
- CCTK_REAL Gtl333 = 0.5*PDstandardNth3gt33;
+ CCTK_REAL_VEC Gtl333 = kmul(PDstandardNth3gt33,ToReal(0.5));
- CCTK_REAL Gt111 = Gtl111*gtu11 + Gtl211*gtu12 + Gtl311*gtu13;
+ CCTK_REAL_VEC Gt111 =
+ kmadd(Gtl111,gtu11,kmadd(Gtl211,gtu12,kmul(Gtl311,gtu13)));
- CCTK_REAL Gt211 = Gtl111*gtu12 + Gtl211*gtu22 + Gtl311*gtu23;
+ CCTK_REAL_VEC Gt211 =
+ kmadd(Gtl111,gtu12,kmadd(Gtl211,gtu22,kmul(Gtl311,gtu23)));
- CCTK_REAL Gt311 = Gtl111*gtu13 + Gtl211*gtu23 + Gtl311*gtu33;
+ CCTK_REAL_VEC Gt311 =
+ kmadd(Gtl111,gtu13,kmadd(Gtl211,gtu23,kmul(Gtl311,gtu33)));
- CCTK_REAL Gt112 = Gtl112*gtu11 + Gtl212*gtu12 + Gtl312*gtu13;
+ CCTK_REAL_VEC Gt112 =
+ kmadd(Gtl112,gtu11,kmadd(Gtl212,gtu12,kmul(Gtl312,gtu13)));
- CCTK_REAL Gt212 = Gtl112*gtu12 + Gtl212*gtu22 + Gtl312*gtu23;
+ CCTK_REAL_VEC Gt212 =
+ kmadd(Gtl112,gtu12,kmadd(Gtl212,gtu22,kmul(Gtl312,gtu23)));
- CCTK_REAL Gt312 = Gtl112*gtu13 + Gtl212*gtu23 + Gtl312*gtu33;
+ CCTK_REAL_VEC Gt312 =
+ kmadd(Gtl112,gtu13,kmadd(Gtl212,gtu23,kmul(Gtl312,gtu33)));
- CCTK_REAL Gt113 = Gtl113*gtu11 + Gtl213*gtu12 + Gtl313*gtu13;
+ CCTK_REAL_VEC Gt113 =
+ kmadd(Gtl113,gtu11,kmadd(Gtl213,gtu12,kmul(Gtl313,gtu13)));
- CCTK_REAL Gt213 = Gtl113*gtu12 + Gtl213*gtu22 + Gtl313*gtu23;
+ CCTK_REAL_VEC Gt213 =
+ kmadd(Gtl113,gtu12,kmadd(Gtl213,gtu22,kmul(Gtl313,gtu23)));
- CCTK_REAL Gt313 = Gtl113*gtu13 + Gtl213*gtu23 + Gtl313*gtu33;
+ CCTK_REAL_VEC Gt313 =
+ kmadd(Gtl113,gtu13,kmadd(Gtl213,gtu23,kmul(Gtl313,gtu33)));
- CCTK_REAL Gt122 = Gtl122*gtu11 + Gtl222*gtu12 + Gtl322*gtu13;
+ CCTK_REAL_VEC Gt122 =
+ kmadd(Gtl122,gtu11,kmadd(Gtl222,gtu12,kmul(Gtl322,gtu13)));
- CCTK_REAL Gt222 = Gtl122*gtu12 + Gtl222*gtu22 + Gtl322*gtu23;
+ CCTK_REAL_VEC Gt222 =
+ kmadd(Gtl122,gtu12,kmadd(Gtl222,gtu22,kmul(Gtl322,gtu23)));
- CCTK_REAL Gt322 = Gtl122*gtu13 + Gtl222*gtu23 + Gtl322*gtu33;
+ CCTK_REAL_VEC Gt322 =
+ kmadd(Gtl122,gtu13,kmadd(Gtl222,gtu23,kmul(Gtl322,gtu33)));
- CCTK_REAL Gt123 = Gtl123*gtu11 + Gtl223*gtu12 + Gtl323*gtu13;
+ CCTK_REAL_VEC Gt123 =
+ kmadd(Gtl123,gtu11,kmadd(Gtl223,gtu12,kmul(Gtl323,gtu13)));
- CCTK_REAL Gt223 = Gtl123*gtu12 + Gtl223*gtu22 + Gtl323*gtu23;
+ CCTK_REAL_VEC Gt223 =
+ kmadd(Gtl123,gtu12,kmadd(Gtl223,gtu22,kmul(Gtl323,gtu23)));
- CCTK_REAL Gt323 = Gtl123*gtu13 + Gtl223*gtu23 + Gtl323*gtu33;
+ CCTK_REAL_VEC Gt323 =
+ kmadd(Gtl123,gtu13,kmadd(Gtl223,gtu23,kmul(Gtl323,gtu33)));
- CCTK_REAL Gt133 = Gtl133*gtu11 + Gtl233*gtu12 + Gtl333*gtu13;
+ CCTK_REAL_VEC Gt133 =
+ kmadd(Gtl133,gtu11,kmadd(Gtl233,gtu12,kmul(Gtl333,gtu13)));
- CCTK_REAL Gt233 = Gtl133*gtu12 + Gtl233*gtu22 + Gtl333*gtu23;
+ CCTK_REAL_VEC Gt233 =
+ kmadd(Gtl133,gtu12,kmadd(Gtl233,gtu22,kmul(Gtl333,gtu23)));
- CCTK_REAL Gt333 = Gtl133*gtu13 + Gtl233*gtu23 + Gtl333*gtu33;
+ CCTK_REAL_VEC Gt333 =
+ kmadd(Gtl133,gtu13,kmadd(Gtl233,gtu23,kmul(Gtl333,gtu33)));
- CCTK_REAL fac1 = IfThen(conformalMethod,-0.5*INV(phiL),1);
+ CCTK_REAL_VEC fac1 =
+ IfThen(conformalMethod,kmul(INV(phiL),ToReal(-0.5)),ToReal(1));
- CCTK_REAL cdphi1 = fac1*PDstandardNth1phi;
+ CCTK_REAL_VEC cdphi1 = kmul(fac1,PDstandardNth1phi);
- CCTK_REAL cdphi2 = fac1*PDstandardNth2phi;
+ CCTK_REAL_VEC cdphi2 = kmul(fac1,PDstandardNth2phi);
- CCTK_REAL cdphi3 = fac1*PDstandardNth3phi;
+ CCTK_REAL_VEC cdphi3 = kmul(fac1,PDstandardNth3phi);
- CCTK_REAL S1 = (-eTtxL + beta1L*eTxxL + beta2L*eTxyL +
- beta3L*eTxzL)*INV(alphaL);
+ CCTK_REAL_VEC S1 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxxL,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL))));
- CCTK_REAL S2 = (-eTtyL + beta1L*eTxyL + beta2L*eTyyL +
- beta3L*eTyzL)*INV(alphaL);
+ CCTK_REAL_VEC S2 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxyL,kmadd(beta2L,eTyyL,kmsub(beta3L,eTyzL,eTtyL))));
- CCTK_REAL S3 = (-eTtzL + beta1L*eTxzL + beta2L*eTyzL +
- beta3L*eTzzL)*INV(alphaL);
+ CCTK_REAL_VEC S3 =
+ kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL))));
- CCTK_REAL M1L = -2.*((At12L*Gt211 + At13L*Gt311)*gtu11 +
- At11L*Gt123*gtu23) + At12L*((6.*cdphi1 - 1.*Gt111)*gtu12 -
- 3.*Gt213*gtu13 + 6.*(cdphi2*gtu22 + cdphi3*gtu23) - 1.*Gt233*gtu33) -
- 1.*((At22L*Gt212 + At12L*(Gt112 + Gt222) + At23L*Gt312)*gtu22 +
- (At13L*Gt112 + At12L*Gt113 + At23L*Gt212)*gtu23 + (At13L*Gt113 +
- At23L*Gt213 + At33L*Gt313)*gtu33) + At11L*((6.*cdphi1 - 2.*Gt111)*gtu11
- + 6.*(cdphi2*gtu12 + cdphi3*gtu13) - 1.*(Gt122*gtu22 + Gt133*gtu33)) +
- At13L*((6.*cdphi1 - 1.*Gt111)*gtu13 + 6.*(cdphi2*gtu23 + cdphi3*gtu33)
- - 1.*(Gt322*gtu22 + Gt333*gtu33)) + gtu11*PDstandardNth1At11 -
- 0.6666666666666666666666666666666666666667*PDstandardNth1trK +
- gtu12*(-1.*(At22L*Gt211 + At23L*Gt311) - 3.*(At11L*Gt112 + At12L*Gt212
- + At13L*Gt312) + PDstandardNth1At12 + PDstandardNth2At11) +
- gtu22*PDstandardNth2At12 + gtu13*(-1.*(At23L*Gt211 + At33L*Gt311) -
- 3.*(At11L*Gt113 + At13L*Gt313) + PDstandardNth1At13 +
- PDstandardNth3At11) + gtu23*(-1.*(At22L*Gt213 + At33L*Gt312 +
- At23L*Gt313) - 2.*(At12L*Gt223 + At13L*Gt323) + PDstandardNth2At13 +
- PDstandardNth3At12) + gtu33*PDstandardNth3At13 -
- 25.13274122871834590770114706623602307358*S1;
+ CCTK_REAL_VEC M1L =
+ kmadd(gtu11,PDstandardNth1At11,kmadd(gtu22,PDstandardNth2At12,kmadd(gtu33,PDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At12L,Gt211,kmul(At13L,Gt311)),gtu11,kmul(At11L,kmul(Gt123,gtu23))),ToReal(-2.),kmadd(kmadd(kmadd(At22L,Gt212,kmadd(At12L,kadd(Gt112,Gt222),kmul(At23L,Gt312))),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At12L,Gt113,kmul(At23L,Gt212))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(PDstandardNth1At12,kadd(PDstandardNth2At11,kmadd(kmadd(At11L,Gt112,kmadd(At12L,Gt212,kmul(At13L,Gt312))),ToReal(-3.),kmul(kmadd(At22L,Gt211,kmul(At23L,Gt311)),ToReal(-1.))))),kmadd(gtu13,kadd(PDstandardNth1At13,kadd(PDstandardNth3At11,kmadd(kmadd(At11L,Gt113,kmul(At13L,Gt313)),ToReal(-3.),kmul(kmadd(At23L,Gt211,kmul(At33L,Gt311)),ToReal(-1.))))),kmadd(gtu23,kadd(PDstandardNth2At13,kadd(PDstandardNth3At12,kmadd(kmadd(At12L,Gt223,kmul(At13L,Gt323)),ToReal(-2.),kmul(kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,Gt313))),ToReal(-1.))))),kmadd(PDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At11L,kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),ToReal(6.),kmul(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.)))))),kmadd(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))))))),kmul(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))))))))))))))))));
- CCTK_REAL M2L = At12L*((6.*cdphi1 - 1.*Gt111)*gtu11 + 6.*(cdphi2*gtu12
- + cdphi3*gtu13) - 2.*Gt122*gtu22 - 3.*Gt123*gtu23 - 1.*Gt133*gtu33) +
- At22L*((6.*cdphi2 - 2.*Gt222)*gtu22 + 6.*cdphi3*gtu23 - 1.*Gt233*gtu33)
- + At23L*(-2.*Gt322*gtu22 - 1.*Gt333*gtu33 + 6.*(cdphi2*gtu23 +
- cdphi3*gtu33)) - 1.*((At11L*Gt112 + At22L*Gt211 + At12L*Gt212 +
- At23L*Gt311 + At13L*Gt312)*gtu11 + Gt122*(At11L*gtu12 + At13L*gtu23) +
- (At23L*Gt223 + At33L*Gt323)*gtu33 + At13L*(Gt112*gtu13 + Gt123*gtu33))
- + gtu11*PDstandardNth1At12 + gtu12*(At22L*(6.*cdphi1 - 3.*Gt212) +
- At12L*(-3.*Gt112 - 1.*Gt222) - 3.*At23L*Gt312 - 1.*At13L*Gt322 +
- PDstandardNth1At22 + PDstandardNth2At12) + gtu22*PDstandardNth2At22 -
- 0.6666666666666666666666666666666666666667*PDstandardNth2trK +
- gtu13*(-2.*(At12L*Gt113 + At22L*Gt213) + At23L*(6.*cdphi1 - 1.*Gt212 -
- 2.*Gt313) - 1.*(At11L*Gt123 + At12L*Gt223 + At33L*Gt312 + At13L*Gt323)
- + PDstandardNth1At23 + PDstandardNth3At12) + gtu23*(-1.*(At23L*Gt222 +
- At33L*Gt322) - 3.*(At22L*Gt223 + At23L*Gt323) + PDstandardNth2At23 +
- PDstandardNth3At22) + gtu33*PDstandardNth3At23 -
- 25.13274122871834590770114706623602307358*S2;
+ CCTK_REAL_VEC M2L =
+ kmadd(gtu11,PDstandardNth1At12,kmadd(gtu22,PDstandardNth2At22,kmadd(gtu33,PDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At11L,Gt112,kmadd(At22L,Gt211,kmadd(At12L,Gt212,kmadd(At23L,Gt311,kmul(At13L,Gt312))))),gtu11,kmadd(Gt122,kmadd(At11L,gtu12,kmul(At13L,gtu23)),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt112,gtu13,kmul(Gt123,gtu33)))))),ToReal(-1.),kmadd(gtu23,kadd(PDstandardNth2At23,kadd(PDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(PDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.)))),kmadd(gtu12,kadd(PDstandardNth1At22,kadd(PDstandardNth2At12,kmadd(At23L,kmul(Gt312,ToReal(-3.)),kmadd(At13L,kmul(Gt322,ToReal(-1.)),kmadd(At12L,kmadd(Gt112,ToReal(-3.),kmul(Gt222,ToReal(-1.))),kmul(At22L,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))))),kmadd(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),ToReal(6.),kmul(gtu11,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))))))),kmadd(gtu13,kadd(PDstandardNth1At23,kadd(PDstandardNth3At12,kmadd(kmadd(At12L,Gt113,kmul(At22L,Gt213)),ToReal(-2.),kmadd(kmadd(At11L,Gt123,kmadd(At12L,Gt223,kmadd(At33L,Gt312,kmul(At13L,Gt323)))),ToReal(-1.),kmul(At23L,kmadd(Gt313,ToReal(-2.),kmadd(Gt212,ToReal(-1.),kmul(cdphi1,ToReal(6.))))))))),kmul(At22L,kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmul(gtu22,kmadd(Gt222,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))))))))))))));
- CCTK_REAL M3L = -1.*((At11L*Gt113 + At23L*Gt211 + At12L*Gt213 +
- At33L*Gt311)*gtu11 + (At22L*Gt223 + At33L*Gt322 + At23L*Gt323)*gtu22 +
- At12L*(Gt113*gtu12 + Gt123*gtu22) + Gt133*(At11L*gtu13 + At12L*gtu23))
- + At13L*((6.*cdphi1 - 1.*(Gt111 + Gt313))*gtu11 + 6.*(cdphi2*gtu12 +
- cdphi3*gtu13) - 1.*Gt122*gtu22 - 3.*Gt123*gtu23 - 2.*Gt133*gtu33) +
- At23L*((6.*cdphi2 - 1.*Gt222)*gtu22 + 6.*cdphi3*gtu23 - 2.*Gt233*gtu33)
- + gtu11*PDstandardNth1At13 + gtu12*(-2.*(At13L*Gt112 + At33L*Gt312) +
- At23L*(6.*cdphi1 - 2.*Gt212 - 1.*Gt313) - 1.*(At11L*Gt123 + At22L*Gt213
- + At12L*Gt223 + At13L*Gt323) + PDstandardNth1At23 + PDstandardNth2At13)
- + gtu22*PDstandardNth2At23 + gtu13*(-3.*(At13L*Gt113 + At23L*Gt213) +
- At33L*(6.*cdphi1 - 3.*Gt313) - 1.*(At12L*Gt233 + At13L*Gt333) +
- PDstandardNth1At33 + PDstandardNth3At13) + gtu23*(-1.*At22L*Gt233 +
- At33L*(6.*cdphi2 - 3.*Gt323) + At23L*(-3.*Gt223 - 1.*Gt333) +
- PDstandardNth2At33 + PDstandardNth3At23) + gtu33*(At33L*(6.*cdphi3 -
- 2.*Gt333) + PDstandardNth3At33) -
- 0.6666666666666666666666666666666666666667*PDstandardNth3trK -
- 25.13274122871834590770114706623602307358*S3;
+ CCTK_REAL_VEC M3L =
+ kmadd(gtu11,PDstandardNth1At13,kmadd(gtu22,PDstandardNth2At23,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At11L,Gt113,kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311)))),gtu11,kmadd(kmadd(At22L,Gt223,kmadd(At33L,Gt322,kmul(At23L,Gt323))),gtu22,kmadd(At12L,kmadd(Gt113,gtu12,kmul(Gt123,gtu22)),kmul(Gt133,kmadd(At11L,gtu13,kmul(At12L,gtu23)))))),ToReal(-1.),kmadd(PDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(gtu13,kadd(PDstandardNth1At33,kadd(PDstandardNth3At13,kmadd(kmadd(At13L,Gt113,kmul(At23L,Gt213)),ToReal(-3.),kmadd(kmadd(At12L,Gt233,kmul(At13L,Gt333)),ToReal(-1.),kmul(At33L,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.)))))))),kmadd(gtu12,kadd(PDstandardNth1At23,kadd(PDstandardNth2At13,kmadd(kmadd(At13L,Gt112,kmul(At33L,Gt312)),ToReal(-2.),kmadd(kmadd(At11L,Gt123,kmadd(At22L,Gt213,kmadd(At12L,Gt223,kmul(At13L,Gt323)))),ToReal(-1.),kmul(At23L,kmadd(Gt212,ToReal(-2.),kmadd(Gt313,ToReal(-1.),kmul(cdphi1,ToReal(6.))))))))),kmadd(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),ToReal(6.),kmul(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.)))))))),kmadd(gtu23,kadd(PDstandardNth2At33,kadd(PDstandardNth3At23,kmadd(At22L,kmul(Gt233,ToReal(-1.)),kmadd(At23L,kmadd(Gt223,ToReal(-3.),kmul(Gt333,ToReal(-1.))),kmul(At33L,kmadd(Gt323,ToReal(-3.),kmul(cdphi2,ToReal(6.)))))))),kmadd(At23L,kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmul(gtu22,kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))),kmul(gtu33,kmadd(At33L,kmadd(Gt333,ToReal(-2.),kmul(cdphi3,ToReal(6.))),PDstandardNth3At33))))))))))));
- CCTK_REAL cSL = Log(detgt);
+ CCTK_REAL_VEC cSL = klog(detgt);
- CCTK_REAL cXt1L = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 +
- Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33 - Xt1L;
+ CCTK_REAL_VEC cXt1L =
+ kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmsub(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2),Xt1L))));
- CCTK_REAL cXt2L = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 +
- Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33 - Xt2L;
+ CCTK_REAL_VEC cXt2L =
+ kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmsub(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2),Xt2L))));
- CCTK_REAL cXt3L = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 +
- Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33 - Xt3L;
-
- CCTK_REAL cAL = At11L*gtu11 + At22L*gtu22 + 2*(At12L*gtu12 +
- At13L*gtu13 + At23L*gtu23) + At33L*gtu33;
+ CCTK_REAL_VEC cXt3L =
+ kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmsub(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2),Xt3L))));
+
+ CCTK_REAL_VEC cAL =
+ kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2)))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(cA[index],cAL,elt_count);
+ vec_store_nta_partial_hi(cS[index],cSL,elt_count);
+ vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count);
+ vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count);
+ vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count);
+ vec_store_nta_partial_hi(M1[index],M1L,elt_count);
+ vec_store_nta_partial_hi(M2[index],M2L,elt_count);
+ vec_store_nta_partial_hi(M3[index],M3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(cA[index],cAL,elt_count);
+ vec_store_nta_partial_lo(cS[index],cSL,elt_count);
+ vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count);
+ vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count);
+ vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count);
+ vec_store_nta_partial_lo(M1[index],M1L,elt_count);
+ vec_store_nta_partial_lo(M2[index],M2L,elt_count);
+ vec_store_nta_partial_lo(M3[index],M3L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- cA[index] = cAL;
- cS[index] = cSL;
- cXt1[index] = cXt1L;
- cXt2[index] = cXt2L;
- cXt3[index] = cXt3L;
- M1[index] = M1L;
- M2[index] = M2L;
- M3[index] = M3L;
+ vec_store_nta(cA[index],cAL);
+ vec_store_nta(cS[index],cSL);
+ vec_store_nta(cXt1[index],cXt1L);
+ vec_store_nta(cXt2[index],cXt2L);
+ vec_store_nta(cXt3[index],cXt3L);
+ vec_store_nta(M1[index],M1L);
+ vec_store_nta(M2[index],M2L);
+ vec_store_nta(M3[index],M3L);
}
- LC_ENDLOOP3 (ML_BSSN_constraints2);
+ LC_ENDLOOP3VEC (ML_BSSN_constraints2);
}
extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc
index 500c6b2..74f09e9 100644
--- a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc
+++ b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,70 +53,71 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertFromADMBase,
+ LC_LOOP3VEC (ML_BSSN_convertFromADMBase,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alpL = alp[index];
- CCTK_REAL betaxL = betax[index];
- CCTK_REAL betayL = betay[index];
- CCTK_REAL betazL = betaz[index];
- CCTK_REAL gxxL = gxx[index];
- CCTK_REAL gxyL = gxy[index];
- CCTK_REAL gxzL = gxz[index];
- CCTK_REAL gyyL = gyy[index];
- CCTK_REAL gyzL = gyz[index];
- CCTK_REAL gzzL = gzz[index];
- CCTK_REAL kxxL = kxx[index];
- CCTK_REAL kxyL = kxy[index];
- CCTK_REAL kxzL = kxz[index];
- CCTK_REAL kyyL = kyy[index];
- CCTK_REAL kyzL = kyz[index];
- CCTK_REAL kzzL = kzz[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL trKL = trK[index];
+ CCTK_REAL_VEC alpL = vec_load(alp[index]);
+ CCTK_REAL_VEC betaxL = vec_load(betax[index]);
+ CCTK_REAL_VEC betayL = vec_load(betay[index]);
+ CCTK_REAL_VEC betazL = vec_load(betaz[index]);
+ CCTK_REAL_VEC gxxL = vec_load(gxx[index]);
+ CCTK_REAL_VEC gxyL = vec_load(gxy[index]);
+ CCTK_REAL_VEC gxzL = vec_load(gxz[index]);
+ CCTK_REAL_VEC gyyL = vec_load(gyy[index]);
+ CCTK_REAL_VEC gyzL = vec_load(gyz[index]);
+ CCTK_REAL_VEC gzzL = vec_load(gzz[index]);
+ CCTK_REAL_VEC kxxL = vec_load(kxx[index]);
+ CCTK_REAL_VEC kxyL = vec_load(kxy[index]);
+ CCTK_REAL_VEC kxzL = vec_load(kxz[index]);
+ CCTK_REAL_VEC kyyL = vec_load(kyy[index]);
+ CCTK_REAL_VEC kyzL = vec_load(kyz[index]);
+ CCTK_REAL_VEC kzzL = vec_load(kzz[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
/* Include user supplied include files */
@@ -123,110 +125,189 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL g11 = gxxL;
+ CCTK_REAL_VEC g11 = gxxL;
- CCTK_REAL g12 = gxyL;
+ CCTK_REAL_VEC g12 = gxyL;
- CCTK_REAL g13 = gxzL;
+ CCTK_REAL_VEC g13 = gxzL;
- CCTK_REAL g22 = gyyL;
+ CCTK_REAL_VEC g22 = gyyL;
- CCTK_REAL g23 = gyzL;
+ CCTK_REAL_VEC g23 = gyzL;
- CCTK_REAL g33 = gzzL;
+ CCTK_REAL_VEC g33 = gzzL;
- CCTK_REAL detg = 2*g12*g13*g23 + g33*(g11*g22 - SQR(g12)) -
- g22*SQR(g13) - g11*SQR(g23);
+ CCTK_REAL_VEC detg =
+ knmsub(g22,SQR(g13),knmsub(g11,SQR(g23),kmadd(g33,kmsub(g11,g22,SQR(g12)),kmul(g12,kmul(g13,kmul(g23,ToReal(2)))))));
- CCTK_REAL gu11 = INV(detg)*(g22*g33 - SQR(g23));
+ CCTK_REAL_VEC gu11 = kmul(INV(detg),kmsub(g22,g33,SQR(g23)));
- CCTK_REAL gu12 = (g13*g23 - g12*g33)*INV(detg);
+ CCTK_REAL_VEC gu12 = kmul(INV(detg),kmsub(g13,g23,kmul(g12,g33)));
- CCTK_REAL gu13 = (-(g13*g22) + g12*g23)*INV(detg);
+ CCTK_REAL_VEC gu13 = kmul(INV(detg),kmsub(g12,g23,kmul(g13,g22)));
- CCTK_REAL gu22 = INV(detg)*(g11*g33 - SQR(g13));
+ CCTK_REAL_VEC gu22 = kmul(INV(detg),kmsub(g11,g33,SQR(g13)));
- CCTK_REAL gu23 = (g12*g13 - g11*g23)*INV(detg);
+ CCTK_REAL_VEC gu23 = kmul(INV(detg),kmsub(g12,g13,kmul(g11,g23)));
- CCTK_REAL gu33 = INV(detg)*(g11*g22 - SQR(g12));
+ CCTK_REAL_VEC gu33 = kmul(INV(detg),kmsub(g11,g22,SQR(g12)));
- CCTK_REAL em4phi;
+ CCTK_REAL_VEC em4phi;
if (conformalMethod)
{
- phiL = pow(detg,-0.166666666666666666666666666667);
+ phiL = kpow(detg,-0.166666666666666666666666666667);
em4phi = SQR(phiL);
}
else
{
- phiL = 0.0833333333333333333333333333333*Log(detg);
+ phiL = kmul(klog(detg),ToReal(0.0833333333333333333333333333333));
- em4phi = exp(-4*phiL);
+ em4phi = kexp(kmul(phiL,ToReal(-4)));
}
- CCTK_REAL gt11L = em4phi*g11;
+ CCTK_REAL_VEC gt11L = kmul(em4phi,g11);
- CCTK_REAL gt12L = em4phi*g12;
+ CCTK_REAL_VEC gt12L = kmul(em4phi,g12);
- CCTK_REAL gt13L = em4phi*g13;
+ CCTK_REAL_VEC gt13L = kmul(em4phi,g13);
- CCTK_REAL gt22L = em4phi*g22;
+ CCTK_REAL_VEC gt22L = kmul(em4phi,g22);
- CCTK_REAL gt23L = em4phi*g23;
+ CCTK_REAL_VEC gt23L = kmul(em4phi,g23);
- CCTK_REAL gt33L = em4phi*g33;
+ CCTK_REAL_VEC gt33L = kmul(em4phi,g33);
- trKL = gu11*kxxL + gu22*kyyL + 2*(gu12*kxyL + gu13*kxzL + gu23*kyzL) +
- gu33*kzzL;
+ trKL =
+ kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2)))));
- CCTK_REAL At11L = em4phi*(kxxL -
- 0.333333333333333333333333333333*g11*trKL);
+ CCTK_REAL_VEC At11L =
+ kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL));
- CCTK_REAL At12L = em4phi*(kxyL -
- 0.333333333333333333333333333333*g12*trKL);
+ CCTK_REAL_VEC At12L =
+ kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL));
- CCTK_REAL At13L = em4phi*(kxzL -
- 0.333333333333333333333333333333*g13*trKL);
+ CCTK_REAL_VEC At13L =
+ kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL));
- CCTK_REAL At22L = em4phi*(kyyL -
- 0.333333333333333333333333333333*g22*trKL);
+ CCTK_REAL_VEC At22L =
+ kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL));
- CCTK_REAL At23L = em4phi*(kyzL -
- 0.333333333333333333333333333333*g23*trKL);
+ CCTK_REAL_VEC At23L =
+ kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL));
- CCTK_REAL At33L = em4phi*(kzzL -
- 0.333333333333333333333333333333*g33*trKL);
+ CCTK_REAL_VEC At33L =
+ kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL));
- CCTK_REAL alphaL = alpL;
+ CCTK_REAL_VEC alphaL = alpL;
- CCTK_REAL beta1L = betaxL;
+ CCTK_REAL_VEC beta1L = betaxL;
- CCTK_REAL beta2L = betayL;
+ CCTK_REAL_VEC beta2L = betayL;
- CCTK_REAL beta3L = betazL;
+ CCTK_REAL_VEC beta3L = betazL;
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_hi(At11[index],At11L,elt_count);
+ vec_store_nta_partial_hi(At12[index],At12L,elt_count);
+ vec_store_nta_partial_hi(At13[index],At13L,elt_count);
+ vec_store_nta_partial_hi(At22[index],At22L,elt_count);
+ vec_store_nta_partial_hi(At23[index],At23L,elt_count);
+ vec_store_nta_partial_hi(At33[index],At33L,elt_count);
+ vec_store_nta_partial_hi(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_hi(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_hi(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_hi(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_hi(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_hi(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_hi(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_hi(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_hi(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_hi(phi[index],phiL,elt_count);
+ vec_store_nta_partial_hi(trK[index],trKL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_lo(At11[index],At11L,elt_count);
+ vec_store_nta_partial_lo(At12[index],At12L,elt_count);
+ vec_store_nta_partial_lo(At13[index],At13L,elt_count);
+ vec_store_nta_partial_lo(At22[index],At22L,elt_count);
+ vec_store_nta_partial_lo(At23[index],At23L,elt_count);
+ vec_store_nta_partial_lo(At33[index],At33L,elt_count);
+ vec_store_nta_partial_lo(beta1[index],beta1L,elt_count);
+ vec_store_nta_partial_lo(beta2[index],beta2L,elt_count);
+ vec_store_nta_partial_lo(beta3[index],beta3L,elt_count);
+ vec_store_nta_partial_lo(gt11[index],gt11L,elt_count);
+ vec_store_nta_partial_lo(gt12[index],gt12L,elt_count);
+ vec_store_nta_partial_lo(gt13[index],gt13L,elt_count);
+ vec_store_nta_partial_lo(gt22[index],gt22L,elt_count);
+ vec_store_nta_partial_lo(gt23[index],gt23L,elt_count);
+ vec_store_nta_partial_lo(gt33[index],gt33L,elt_count);
+ vec_store_nta_partial_lo(phi[index],phiL,elt_count);
+ vec_store_nta_partial_lo(trK[index],trKL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpha[index] = alphaL;
- At11[index] = At11L;
- At12[index] = At12L;
- At13[index] = At13L;
- At22[index] = At22L;
- At23[index] = At23L;
- At33[index] = At33L;
- beta1[index] = beta1L;
- beta2[index] = beta2L;
- beta3[index] = beta3L;
- gt11[index] = gt11L;
- gt12[index] = gt12L;
- gt13[index] = gt13L;
- gt22[index] = gt22L;
- gt23[index] = gt23L;
- gt33[index] = gt33L;
- phi[index] = phiL;
- trK[index] = trKL;
+ vec_store_nta(alpha[index],alphaL);
+ vec_store_nta(At11[index],At11L);
+ vec_store_nta(At12[index],At12L);
+ vec_store_nta(At13[index],At13L);
+ vec_store_nta(At22[index],At22L);
+ vec_store_nta(At23[index],At23L);
+ vec_store_nta(At33[index],At33L);
+ vec_store_nta(beta1[index],beta1L);
+ vec_store_nta(beta2[index],beta2L);
+ vec_store_nta(beta3[index],beta3L);
+ vec_store_nta(gt11[index],gt11L);
+ vec_store_nta(gt12[index],gt12L);
+ vec_store_nta(gt13[index],gt13L);
+ vec_store_nta(gt22[index],gt22L);
+ vec_store_nta(gt23[index],gt23L);
+ vec_store_nta(gt33[index],gt33L);
+ vec_store_nta(phi[index],phiL);
+ vec_store_nta(trK[index],trKL);
}
- LC_ENDLOOP3 (ML_BSSN_convertFromADMBase);
+ LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBase);
}
extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc
index 2eebecc..4d543bd 100644
--- a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc
+++ b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_convertFromADMBaseGamma_SelectBCs(CCTK_ARGUMENTS)
{
@@ -71,114 +72,115 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertFromADMBaseGamma,
+ LC_LOOP3VEC (ML_BSSN_convertFromADMBaseGamma,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL dtalpL = dtalp[index];
- CCTK_REAL dtbetaxL = dtbetax[index];
- CCTK_REAL dtbetayL = dtbetay[index];
- CCTK_REAL dtbetazL = dtbetaz[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL rL = r[index];
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC dtalpL = vec_load(dtalp[index]);
+ CCTK_REAL_VEC dtbetaxL = vec_load(dtbetax[index]);
+ CCTK_REAL_VEC dtbetayL = vec_load(dtbetay[index]);
+ CCTK_REAL_VEC dtbetazL = vec_load(dtbetaz[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC rL = vec_load(r[index]);
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
- CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
- CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
- CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
- CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
- CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
- CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
- CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
- CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
- CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
- CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
- CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
- CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
- CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
- CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
- CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
- CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
- CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
- CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(&gt11[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(&gt12[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(&gt13[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(&gt22[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(&gt23[index]);
+ CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(&gt33[index]);
+ CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(&gt33[index]);
/* Calculate temporaries and grid functions */
ptrdiff_t dir1 = Sign(beta1L);
@@ -187,154 +189,172 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk
ptrdiff_t dir3 = Sign(beta3L);
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL Gt111 = 0.5*(gtu11*PDstandardNth1gt11 +
- 2*(gtu12*PDstandardNth1gt12 + gtu13*PDstandardNth1gt13) -
- gtu12*PDstandardNth2gt11 - gtu13*PDstandardNth3gt11);
+ CCTK_REAL_VEC Gt111 =
+ kmul(ToReal(0.5),kmadd(gtu11,PDstandardNth1gt11,knmsub(gtu12,PDstandardNth2gt11,kmsub(kmadd(gtu12,PDstandardNth1gt12,kmul(gtu13,PDstandardNth1gt13)),ToReal(2),kmul(gtu13,PDstandardNth3gt11)))));
- CCTK_REAL Gt211 = 0.5*(gtu12*PDstandardNth1gt11 +
- 2*(gtu22*PDstandardNth1gt12 + gtu23*PDstandardNth1gt13) -
- gtu22*PDstandardNth2gt11 - gtu23*PDstandardNth3gt11);
+ CCTK_REAL_VEC Gt211 =
+ kmul(ToReal(0.5),kmadd(gtu12,PDstandardNth1gt11,knmsub(gtu22,PDstandardNth2gt11,kmsub(kmadd(gtu22,PDstandardNth1gt12,kmul(gtu23,PDstandardNth1gt13)),ToReal(2),kmul(gtu23,PDstandardNth3gt11)))));
- CCTK_REAL Gt311 = 0.5*(gtu13*PDstandardNth1gt11 +
- 2*(gtu23*PDstandardNth1gt12 + gtu33*PDstandardNth1gt13) -
- gtu23*PDstandardNth2gt11 - gtu33*PDstandardNth3gt11);
+ CCTK_REAL_VEC Gt311 =
+ kmul(ToReal(0.5),kmadd(gtu13,PDstandardNth1gt11,knmsub(gtu23,PDstandardNth2gt11,kmsub(kmadd(gtu23,PDstandardNth1gt12,kmul(gtu33,PDstandardNth1gt13)),ToReal(2),kmul(gtu33,PDstandardNth3gt11)))));
- CCTK_REAL Gt112 = 0.5*(gtu12*PDstandardNth1gt22 +
- gtu11*PDstandardNth2gt11 + gtu13*(PDstandardNth1gt23 +
- PDstandardNth2gt13 - PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt112 =
+ kmul(kmadd(gtu12,PDstandardNth1gt22,kmadd(gtu11,PDstandardNth2gt11,kmul(gtu13,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5));
- CCTK_REAL Gt212 = 0.5*(gtu22*PDstandardNth1gt22 +
- gtu12*PDstandardNth2gt11 + gtu23*(PDstandardNth1gt23 +
- PDstandardNth2gt13 - PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt212 =
+ kmul(kmadd(gtu22,PDstandardNth1gt22,kmadd(gtu12,PDstandardNth2gt11,kmul(gtu23,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5));
- CCTK_REAL Gt312 = 0.5*(gtu23*PDstandardNth1gt22 +
- gtu13*PDstandardNth2gt11 + gtu33*(PDstandardNth1gt23 +
- PDstandardNth2gt13 - PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt312 =
+ kmul(kmadd(gtu23,PDstandardNth1gt22,kmadd(gtu13,PDstandardNth2gt11,kmul(gtu33,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5));
- CCTK_REAL Gt113 = 0.5*(gtu13*PDstandardNth1gt33 +
- gtu11*PDstandardNth3gt11 + gtu12*(PDstandardNth1gt23 -
- PDstandardNth2gt13 + PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt113 =
+ kmul(kmadd(gtu13,PDstandardNth1gt33,kmadd(gtu11,PDstandardNth3gt11,kmul(gtu12,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5));
- CCTK_REAL Gt213 = 0.5*(gtu23*PDstandardNth1gt33 +
- gtu12*PDstandardNth3gt11 + gtu22*(PDstandardNth1gt23 -
- PDstandardNth2gt13 + PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt213 =
+ kmul(kmadd(gtu23,PDstandardNth1gt33,kmadd(gtu12,PDstandardNth3gt11,kmul(gtu22,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5));
- CCTK_REAL Gt313 = 0.5*(gtu33*PDstandardNth1gt33 +
- gtu13*PDstandardNth3gt11 + gtu23*(PDstandardNth1gt23 -
- PDstandardNth2gt13 + PDstandardNth3gt12));
+ CCTK_REAL_VEC Gt313 =
+ kmul(kmadd(gtu33,PDstandardNth1gt33,kmadd(gtu13,PDstandardNth3gt11,kmul(gtu23,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5));
- CCTK_REAL Gt122 = 0.5*(gtu11*(-PDstandardNth1gt22 +
- 2*PDstandardNth2gt12) + gtu12*PDstandardNth2gt22 +
- gtu13*(2*PDstandardNth2gt23 - PDstandardNth3gt22));
+ CCTK_REAL_VEC Gt122 =
+ kmul(ToReal(0.5),kmadd(gtu12,PDstandardNth2gt22,kmadd(gtu11,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu13,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22)))));
- CCTK_REAL Gt222 = 0.5*(gtu12*(-PDstandardNth1gt22 +
- 2*PDstandardNth2gt12) + gtu22*PDstandardNth2gt22 +
- gtu23*(2*PDstandardNth2gt23 - PDstandardNth3gt22));
+ CCTK_REAL_VEC Gt222 =
+ kmul(ToReal(0.5),kmadd(gtu22,PDstandardNth2gt22,kmadd(gtu12,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu23,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22)))));
- CCTK_REAL Gt322 = 0.5*(gtu13*(-PDstandardNth1gt22 +
- 2*PDstandardNth2gt12) + gtu23*PDstandardNth2gt22 +
- gtu33*(2*PDstandardNth2gt23 - PDstandardNth3gt22));
+ CCTK_REAL_VEC Gt322 =
+ kmul(ToReal(0.5),kmadd(gtu23,PDstandardNth2gt22,kmadd(gtu13,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu33,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22)))));
- CCTK_REAL Gt123 = 0.5*(gtu13*PDstandardNth2gt33 +
- gtu11*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) +
- gtu12*PDstandardNth3gt22);
+ CCTK_REAL_VEC Gt123 =
+ kmul(kmadd(gtu13,PDstandardNth2gt33,kmadd(gtu12,PDstandardNth3gt22,kmul(gtu11,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5));
- CCTK_REAL Gt223 = 0.5*(gtu23*PDstandardNth2gt33 +
- gtu12*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) +
- gtu22*PDstandardNth3gt22);
+ CCTK_REAL_VEC Gt223 =
+ kmul(kmadd(gtu23,PDstandardNth2gt33,kmadd(gtu22,PDstandardNth3gt22,kmul(gtu12,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5));
- CCTK_REAL Gt323 = 0.5*(gtu33*PDstandardNth2gt33 +
- gtu13*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) +
- gtu23*PDstandardNth3gt22);
+ CCTK_REAL_VEC Gt323 =
+ kmul(kmadd(gtu33,PDstandardNth2gt33,kmadd(gtu23,PDstandardNth3gt22,kmul(gtu13,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5));
- CCTK_REAL Gt133 = 0.5*(gtu11*(-PDstandardNth1gt33 +
- 2*PDstandardNth3gt13) + gtu12*(-PDstandardNth2gt33 +
- 2*PDstandardNth3gt23) + gtu13*PDstandardNth3gt33);
+ CCTK_REAL_VEC Gt133 =
+ kmul(ToReal(0.5),kmadd(gtu13,PDstandardNth3gt33,kmadd(gtu11,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu12,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33)))));
- CCTK_REAL Gt233 = 0.5*(gtu12*(-PDstandardNth1gt33 +
- 2*PDstandardNth3gt13) + gtu22*(-PDstandardNth2gt33 +
- 2*PDstandardNth3gt23) + gtu23*PDstandardNth3gt33);
+ CCTK_REAL_VEC Gt233 =
+ kmul(ToReal(0.5),kmadd(gtu23,PDstandardNth3gt33,kmadd(gtu12,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu22,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33)))));
- CCTK_REAL Gt333 = 0.5*(gtu13*(-PDstandardNth1gt33 +
- 2*PDstandardNth3gt13) + gtu23*(-PDstandardNth2gt33 +
- 2*PDstandardNth3gt23) + gtu33*PDstandardNth3gt33);
+ CCTK_REAL_VEC Gt333 =
+ kmul(ToReal(0.5),kmadd(gtu33,PDstandardNth3gt33,kmadd(gtu13,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu23,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33)))));
- CCTK_REAL Xt1L = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 +
- Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33;
+ CCTK_REAL_VEC Xt1L =
+ kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmul(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2)))));
- CCTK_REAL Xt2L = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 +
- Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33;
+ CCTK_REAL_VEC Xt2L =
+ kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmul(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2)))));
- CCTK_REAL Xt3L = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 +
- Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33;
+ CCTK_REAL_VEC Xt3L =
+ kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmul(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2)))));
- CCTK_REAL AL = IfThen(LapseACoeff !=
- 0,-(INV(ToReal(harmonicF))*pow(alphaL,-ToReal(harmonicN))*(dtalpL -
- (beta1L*PDupwindNthAnti1alpha + beta2L*PDupwindNthAnti2alpha +
- beta3L*PDupwindNthAnti3alpha + PDupwindNthSymm1alpha*Abs(beta1L) +
- PDupwindNthSymm2alpha*Abs(beta2L) +
- PDupwindNthSymm3alpha*Abs(beta3L))*ToReal(LapseAdvectionCoeff))),0);
+ CCTK_REAL_VEC AL = IfThen(LapseACoeff !=
+ 0,kneg(kmul(INV(ToReal(harmonicF)),kmul(kpow(alphaL,-harmonicN),knmsub(kmadd(beta1L,PDupwindNthAnti1alpha,kmadd(beta2L,PDupwindNthAnti2alpha,kmadd(beta3L,PDupwindNthAnti3alpha,kmadd(PDupwindNthSymm1alpha,kfabs(beta1L),kmadd(PDupwindNthSymm2alpha,kfabs(beta2L),kmul(PDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),dtalpL)))),ToReal(0));
- CCTK_REAL theta = fmin(1,exp(1 -
- rL*INV(ToReal(SpatialShiftGammaCoeffRadius))));
+ CCTK_REAL_VEC theta =
+ kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1))));
- CCTK_REAL B1L;
- CCTK_REAL B2L;
- CCTK_REAL B3L;
+ CCTK_REAL_VEC B1L;
+ CCTK_REAL_VEC B2L;
+ CCTK_REAL_VEC B3L;
if (ShiftBCoeff*ShiftGammaCoeff != 0)
{
- B1L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetaxL -
- (beta1L*PDupwindNthAnti1beta1 + beta2L*PDupwindNthAnti2beta1 +
- beta3L*PDupwindNthAnti3beta1 + PDupwindNthSymm1beta1*Abs(beta1L) +
- PDupwindNthSymm2beta1*Abs(beta2L) +
- PDupwindNthSymm3beta1*Abs(beta3L))*ToReal(ShiftAdvectionCoeff));
+ B1L =
+ kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta1,kmadd(beta2L,PDupwindNthAnti2beta1,kmadd(beta3L,PDupwindNthAnti3beta1,kmadd(PDupwindNthSymm1beta1,kfabs(beta1L),kmadd(PDupwindNthSymm2beta1,kfabs(beta2L),kmul(PDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)));
- B2L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetayL -
- (beta1L*PDupwindNthAnti1beta2 + beta2L*PDupwindNthAnti2beta2 +
- beta3L*PDupwindNthAnti3beta2 + PDupwindNthSymm1beta2*Abs(beta1L) +
- PDupwindNthSymm2beta2*Abs(beta2L) +
- PDupwindNthSymm3beta2*Abs(beta3L))*ToReal(ShiftAdvectionCoeff));
+ B2L =
+ kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta2,kmadd(beta2L,PDupwindNthAnti2beta2,kmadd(beta3L,PDupwindNthAnti3beta2,kmadd(PDupwindNthSymm1beta2,kfabs(beta1L),kmadd(PDupwindNthSymm2beta2,kfabs(beta2L),kmul(PDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)));
- B3L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetazL -
- (beta1L*PDupwindNthAnti1beta3 + beta2L*PDupwindNthAnti2beta3 +
- beta3L*PDupwindNthAnti3beta3 + PDupwindNthSymm1beta3*Abs(beta1L) +
- PDupwindNthSymm2beta3*Abs(beta2L) +
- PDupwindNthSymm3beta3*Abs(beta3L))*ToReal(ShiftAdvectionCoeff));
+ B3L =
+ kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta3,kmadd(beta2L,PDupwindNthAnti2beta3,kmadd(beta3L,PDupwindNthAnti3beta3,kmadd(PDupwindNthSymm1beta3,kfabs(beta1L),kmadd(PDupwindNthSymm2beta3,kfabs(beta2L),kmul(PDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)));
}
else
{
- B1L = 0;
+ B1L = ToReal(0);
- B2L = 0;
+ B2L = ToReal(0);
- B3L = 0;
+ B3L = ToReal(0);
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(A[index],AL,elt_count);
+ vec_store_nta_partial_hi(B1[index],B1L,elt_count);
+ vec_store_nta_partial_hi(B2[index],B2L,elt_count);
+ vec_store_nta_partial_hi(B3[index],B3L,elt_count);
+ vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(A[index],AL,elt_count);
+ vec_store_nta_partial_lo(B1[index],B1L,elt_count);
+ vec_store_nta_partial_lo(B2[index],B2L,elt_count);
+ vec_store_nta_partial_lo(B3[index],B3L,elt_count);
+ vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count);
+ break;
}
/* Copy local copies back to grid functions */
- A[index] = AL;
- B1[index] = B1L;
- B2[index] = B2L;
- B3[index] = B3L;
- Xt1[index] = Xt1L;
- Xt2[index] = Xt2L;
- Xt3[index] = Xt3L;
+ vec_store_nta(A[index],AL);
+ vec_store_nta(B1[index],B1L);
+ vec_store_nta(B2[index],B2L);
+ vec_store_nta(B3[index],B3L);
+ vec_store_nta(Xt1[index],Xt1L);
+ vec_store_nta(Xt2[index],Xt2L);
+ vec_store_nta(Xt3[index],Xt3L);
}
- LC_ENDLOOP3 (ML_BSSN_convertFromADMBaseGamma);
+ LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBaseGamma);
}
extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc
index 589ccc2..5219ef7 100644
--- a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc
+++ b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,76 +53,77 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertToADMBase,
+ LC_LOOP3VEC (ML_BSSN_convertToADMBase,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
- CCTK_REAL gxxL = gxx[index];
- CCTK_REAL gxyL = gxy[index];
- CCTK_REAL gxzL = gxz[index];
- CCTK_REAL gyyL = gyy[index];
- CCTK_REAL gyzL = gyz[index];
- CCTK_REAL gzzL = gzz[index];
- CCTK_REAL phiL = phi[index];
- CCTK_REAL trKL = trK[index];
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
+ CCTK_REAL_VEC gxxL = vec_load(gxx[index]);
+ CCTK_REAL_VEC gxyL = vec_load(gxy[index]);
+ CCTK_REAL_VEC gxzL = vec_load(gxz[index]);
+ CCTK_REAL_VEC gyyL = vec_load(gyy[index]);
+ CCTK_REAL_VEC gyzL = vec_load(gyz[index]);
+ CCTK_REAL_VEC gzzL = vec_load(gzz[index]);
+ CCTK_REAL_VEC phiL = vec_load(phi[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
/* Include user supplied include files */
@@ -129,65 +131,139 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL e4phi = IfThen(conformalMethod,INV(SQR(phiL)),exp(4*phiL));
+ CCTK_REAL_VEC e4phi =
+ IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4))));
- gxxL = e4phi*gt11L;
+ gxxL = kmul(e4phi,gt11L);
- gxyL = e4phi*gt12L;
+ gxyL = kmul(e4phi,gt12L);
- gxzL = e4phi*gt13L;
+ gxzL = kmul(e4phi,gt13L);
- gyyL = e4phi*gt22L;
+ gyyL = kmul(e4phi,gt22L);
- gyzL = e4phi*gt23L;
+ gyzL = kmul(e4phi,gt23L);
- gzzL = e4phi*gt33L;
+ gzzL = kmul(e4phi,gt33L);
- CCTK_REAL kxxL = At11L*e4phi +
- 0.333333333333333333333333333333*gxxL*trKL;
+ CCTK_REAL_VEC kxxL =
+ kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL kxyL = At12L*e4phi +
- 0.333333333333333333333333333333*gxyL*trKL;
+ CCTK_REAL_VEC kxyL =
+ kmadd(At12L,e4phi,kmul(gxyL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL kxzL = At13L*e4phi +
- 0.333333333333333333333333333333*gxzL*trKL;
+ CCTK_REAL_VEC kxzL =
+ kmadd(At13L,e4phi,kmul(gxzL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL kyyL = At22L*e4phi +
- 0.333333333333333333333333333333*gyyL*trKL;
+ CCTK_REAL_VEC kyyL =
+ kmadd(At22L,e4phi,kmul(gyyL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL kyzL = At23L*e4phi +
- 0.333333333333333333333333333333*gyzL*trKL;
+ CCTK_REAL_VEC kyzL =
+ kmadd(At23L,e4phi,kmul(gyzL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL kzzL = At33L*e4phi +
- 0.333333333333333333333333333333*gzzL*trKL;
+ CCTK_REAL_VEC kzzL =
+ kmadd(At33L,e4phi,kmul(gzzL,kmul(trKL,ToReal(0.333333333333333333333333333333))));
- CCTK_REAL alpL = alphaL;
+ CCTK_REAL_VEC alpL = alphaL;
- CCTK_REAL betaxL = beta1L;
+ CCTK_REAL_VEC betaxL = beta1L;
- CCTK_REAL betayL = beta2L;
+ CCTK_REAL_VEC betayL = beta2L;
- CCTK_REAL betazL = beta3L;
+ CCTK_REAL_VEC betazL = beta3L;
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alp[index],alpL,elt_count);
+ vec_store_nta_partial_hi(betax[index],betaxL,elt_count);
+ vec_store_nta_partial_hi(betay[index],betayL,elt_count);
+ vec_store_nta_partial_hi(betaz[index],betazL,elt_count);
+ vec_store_nta_partial_hi(gxx[index],gxxL,elt_count);
+ vec_store_nta_partial_hi(gxy[index],gxyL,elt_count);
+ vec_store_nta_partial_hi(gxz[index],gxzL,elt_count);
+ vec_store_nta_partial_hi(gyy[index],gyyL,elt_count);
+ vec_store_nta_partial_hi(gyz[index],gyzL,elt_count);
+ vec_store_nta_partial_hi(gzz[index],gzzL,elt_count);
+ vec_store_nta_partial_hi(kxx[index],kxxL,elt_count);
+ vec_store_nta_partial_hi(kxy[index],kxyL,elt_count);
+ vec_store_nta_partial_hi(kxz[index],kxzL,elt_count);
+ vec_store_nta_partial_hi(kyy[index],kyyL,elt_count);
+ vec_store_nta_partial_hi(kyz[index],kyzL,elt_count);
+ vec_store_nta_partial_hi(kzz[index],kzzL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alp[index],alpL,elt_count);
+ vec_store_nta_partial_lo(betax[index],betaxL,elt_count);
+ vec_store_nta_partial_lo(betay[index],betayL,elt_count);
+ vec_store_nta_partial_lo(betaz[index],betazL,elt_count);
+ vec_store_nta_partial_lo(gxx[index],gxxL,elt_count);
+ vec_store_nta_partial_lo(gxy[index],gxyL,elt_count);
+ vec_store_nta_partial_lo(gxz[index],gxzL,elt_count);
+ vec_store_nta_partial_lo(gyy[index],gyyL,elt_count);
+ vec_store_nta_partial_lo(gyz[index],gyzL,elt_count);
+ vec_store_nta_partial_lo(gzz[index],gzzL,elt_count);
+ vec_store_nta_partial_lo(kxx[index],kxxL,elt_count);
+ vec_store_nta_partial_lo(kxy[index],kxyL,elt_count);
+ vec_store_nta_partial_lo(kxz[index],kxzL,elt_count);
+ vec_store_nta_partial_lo(kyy[index],kyyL,elt_count);
+ vec_store_nta_partial_lo(kyz[index],kyzL,elt_count);
+ vec_store_nta_partial_lo(kzz[index],kzzL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alp[index] = alpL;
- betax[index] = betaxL;
- betay[index] = betayL;
- betaz[index] = betazL;
- gxx[index] = gxxL;
- gxy[index] = gxyL;
- gxz[index] = gxzL;
- gyy[index] = gyyL;
- gyz[index] = gyzL;
- gzz[index] = gzzL;
- kxx[index] = kxxL;
- kxy[index] = kxyL;
- kxz[index] = kxzL;
- kyy[index] = kyyL;
- kyz[index] = kyzL;
- kzz[index] = kzzL;
+ vec_store_nta(alp[index],alpL);
+ vec_store_nta(betax[index],betaxL);
+ vec_store_nta(betay[index],betayL);
+ vec_store_nta(betaz[index],betazL);
+ vec_store_nta(gxx[index],gxxL);
+ vec_store_nta(gxy[index],gxyL);
+ vec_store_nta(gxz[index],gxzL);
+ vec_store_nta(gyy[index],gyyL);
+ vec_store_nta(gyz[index],gyzL);
+ vec_store_nta(gzz[index],gzzL);
+ vec_store_nta(kxx[index],kxxL);
+ vec_store_nta(kxy[index],kxyL);
+ vec_store_nta(kxz[index],kxzL);
+ vec_store_nta(kyy[index],kyyL);
+ vec_store_nta(kyz[index],kyzL);
+ vec_store_nta(kzz[index],kzzL);
}
- LC_ENDLOOP3 (ML_BSSN_convertToADMBase);
+ LC_ENDLOOP3VEC (ML_BSSN_convertToADMBase);
}
extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc
index 53b6067..c310b24 100644
--- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc
+++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_convertToADMBaseDtLapseShift_SelectBCs(CCTK_ARGUMENTS)
{
@@ -68,94 +69,95 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertToADMBaseDtLapseShift,
+ LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL rL = r[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC rL = vec_load(r[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
/* Include user supplied include files */
/* Precompute derivatives */
- CCTK_REAL const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
- CCTK_REAL const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
- CCTK_REAL const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
- CCTK_REAL const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
- CCTK_REAL const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
- CCTK_REAL const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
- CCTK_REAL const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
- CCTK_REAL const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
- CCTK_REAL const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]);
+ CCTK_REAL_VEC const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]);
/* Calculate temporaries and grid functions */
ptrdiff_t dir1 = Sign(beta1L);
@@ -164,46 +166,74 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const
ptrdiff_t dir3 = Sign(beta3L);
- CCTK_REAL eta = fmin(1,INV(rL)*ToReal(SpatialBetaDriverRadius));
-
- CCTK_REAL theta = fmin(1,exp(1 -
- rL*INV(ToReal(SpatialShiftGammaCoeffRadius))));
-
- CCTK_REAL dtalpL =
- -(pow(alphaL,ToReal(harmonicN))*ToReal(harmonicF)*(trKL + (AL -
- trKL)*ToReal(LapseACoeff))) + (beta1L*PDupwindNthAnti1alpha +
- beta2L*PDupwindNthAnti2alpha + beta3L*PDupwindNthAnti3alpha +
- PDupwindNthSymm1alpha*Abs(beta1L) + PDupwindNthSymm2alpha*Abs(beta2L) +
- PDupwindNthSymm3alpha*Abs(beta3L))*ToReal(LapseAdvectionCoeff);
-
- CCTK_REAL dtbetaxL = (beta1L*PDupwindNthAnti1beta1 +
- beta2L*PDupwindNthAnti2beta1 + beta3L*PDupwindNthAnti3beta1 +
- PDupwindNthSymm1beta1*Abs(beta1L) + PDupwindNthSymm2beta1*Abs(beta2L) +
- PDupwindNthSymm3beta1*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- theta*(Xt1L + beta1L*eta*ToReal(BetaDriver)*(-1 + ToReal(ShiftBCoeff))
- + (B1L - Xt1L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
-
- CCTK_REAL dtbetayL = (beta1L*PDupwindNthAnti1beta2 +
- beta2L*PDupwindNthAnti2beta2 + beta3L*PDupwindNthAnti3beta2 +
- PDupwindNthSymm1beta2*Abs(beta1L) + PDupwindNthSymm2beta2*Abs(beta2L) +
- PDupwindNthSymm3beta2*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- theta*(Xt2L + beta2L*eta*ToReal(BetaDriver)*(-1 + ToReal(ShiftBCoeff))
- + (B2L - Xt2L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
-
- CCTK_REAL dtbetazL = (beta1L*PDupwindNthAnti1beta3 +
- beta2L*PDupwindNthAnti2beta3 + beta3L*PDupwindNthAnti3beta3 +
- PDupwindNthSymm1beta3*Abs(beta1L) + PDupwindNthSymm2beta3*Abs(beta2L) +
- PDupwindNthSymm3beta3*Abs(beta3L))*ToReal(ShiftAdvectionCoeff) +
- theta*(Xt3L + beta3L*eta*ToReal(BetaDriver)*(-1 + ToReal(ShiftBCoeff))
- + (B3L - Xt3L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC eta =
+ kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius)));
+
+ CCTK_REAL_VEC theta =
+ kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1))));
+
+ CCTK_REAL_VEC dtalpL =
+ kmsub(kmadd(beta1L,PDupwindNthAnti1alpha,kmadd(beta2L,PDupwindNthAnti2alpha,kmadd(beta3L,PDupwindNthAnti3alpha,kmadd(PDupwindNthSymm1alpha,kfabs(beta1L),kmadd(PDupwindNthSymm2alpha,kfabs(beta2L),kmul(PDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL))));
+
+ CCTK_REAL_VEC dtbetaxL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta1,kmadd(beta2L,PDupwindNthAnti2beta1,kmadd(beta3L,PDupwindNthAnti3beta1,kmadd(PDupwindNthSymm1beta1,kfabs(beta1L),kmadd(PDupwindNthSymm2beta1,kfabs(beta2L),kmul(PDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1
+ +
+ ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))));
+
+ CCTK_REAL_VEC dtbetayL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta2,kmadd(beta2L,PDupwindNthAnti2beta2,kmadd(beta3L,PDupwindNthAnti3beta2,kmadd(PDupwindNthSymm1beta2,kfabs(beta1L),kmadd(PDupwindNthSymm2beta2,kfabs(beta2L),kmul(PDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1
+ +
+ ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))));
+
+ CCTK_REAL_VEC dtbetazL =
+ kmadd(kmadd(beta1L,PDupwindNthAnti1beta3,kmadd(beta2L,PDupwindNthAnti2beta3,kmadd(beta3L,PDupwindNthAnti3beta3,kmadd(PDupwindNthSymm1beta3,kfabs(beta1L),kmadd(PDupwindNthSymm2beta3,kfabs(beta2L),kmul(PDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1
+ +
+ ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- dtalp[index] = dtalpL;
- dtbetax[index] = dtbetaxL;
- dtbetay[index] = dtbetayL;
- dtbetaz[index] = dtbetazL;
+ vec_store_nta(dtalp[index],dtalpL);
+ vec_store_nta(dtbetax[index],dtbetaxL);
+ vec_store_nta(dtbetay[index],dtbetayL);
+ vec_store_nta(dtbetaz[index],dtbetazL);
}
- LC_ENDLOOP3 (ML_BSSN_convertToADMBaseDtLapseShift);
+ LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift);
}
extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc
index 0742cd4..1356fbf 100644
--- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc
+++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_SelectBCs(CCTK_ARGUMENTS)
{
@@ -67,65 +68,66 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertToADMBaseDtLapseShiftBoundary,
+ LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL rL = r[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC rL = vec_load(r[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
/* Include user supplied include files */
@@ -133,34 +135,71 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL eta = fmin(1,INV(rL)*ToReal(SpatialBetaDriverRadius));
+ CCTK_REAL_VEC eta =
+ kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius)));
- CCTK_REAL theta = fmin(1,exp(1 -
- rL*INV(ToReal(SpatialShiftGammaCoeffRadius))));
+ CCTK_REAL_VEC theta =
+ kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1))));
- CCTK_REAL dtalpL =
- -(pow(alphaL,ToReal(harmonicN))*ToReal(harmonicF)*(trKL + (AL -
- trKL)*ToReal(LapseACoeff)));
+ CCTK_REAL_VEC dtalpL =
+ kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL))));
- CCTK_REAL dtbetaxL = theta*(Xt1L + beta1L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B1L -
- Xt1L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetaxL =
+ kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
- CCTK_REAL dtbetayL = theta*(Xt2L + beta2L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B2L -
- Xt2L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetayL =
+ kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
- CCTK_REAL dtbetazL = theta*(Xt3L + beta3L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B3L -
- Xt3L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetazL =
+ kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- dtalp[index] = dtalpL;
- dtbetax[index] = dtbetaxL;
- dtbetay[index] = dtbetayL;
- dtbetaz[index] = dtbetazL;
+ vec_store_nta(dtalp[index],dtalpL);
+ vec_store_nta(dtbetax[index],dtbetaxL);
+ vec_store_nta(dtbetay[index],dtbetayL);
+ vec_store_nta(dtbetaz[index],dtbetazL);
}
- LC_ENDLOOP3 (ML_BSSN_convertToADMBaseDtLapseShiftBoundary);
+ LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary);
}
extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc
index 5190707..36bd7fa 100644
--- a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc
+++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,65 +53,66 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_convertToADMBaseFakeDtLapseShift,
+ LC_LOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL AL = A[index];
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL B1L = B1[index];
- CCTK_REAL B2L = B2[index];
- CCTK_REAL B3L = B3[index];
- CCTK_REAL beta1L = beta1[index];
- CCTK_REAL beta2L = beta2[index];
- CCTK_REAL beta3L = beta3[index];
- CCTK_REAL rL = r[index];
- CCTK_REAL trKL = trK[index];
- CCTK_REAL Xt1L = Xt1[index];
- CCTK_REAL Xt2L = Xt2[index];
- CCTK_REAL Xt3L = Xt3[index];
+ CCTK_REAL_VEC AL = vec_load(A[index]);
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC B1L = vec_load(B1[index]);
+ CCTK_REAL_VEC B2L = vec_load(B2[index]);
+ CCTK_REAL_VEC B3L = vec_load(B3[index]);
+ CCTK_REAL_VEC beta1L = vec_load(beta1[index]);
+ CCTK_REAL_VEC beta2L = vec_load(beta2[index]);
+ CCTK_REAL_VEC beta3L = vec_load(beta3[index]);
+ CCTK_REAL_VEC rL = vec_load(r[index]);
+ CCTK_REAL_VEC trKL = vec_load(trK[index]);
+ CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]);
+ CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]);
+ CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]);
/* Include user supplied include files */
@@ -118,34 +120,71 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL eta = fmin(1,INV(rL)*ToReal(SpatialBetaDriverRadius));
+ CCTK_REAL_VEC eta =
+ kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius)));
- CCTK_REAL theta = fmin(1,exp(1 -
- rL*INV(ToReal(SpatialShiftGammaCoeffRadius))));
+ CCTK_REAL_VEC theta =
+ kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1))));
- CCTK_REAL dtalpL =
- -(pow(alphaL,ToReal(harmonicN))*ToReal(harmonicF)*(trKL + (AL -
- trKL)*ToReal(LapseACoeff)));
+ CCTK_REAL_VEC dtalpL =
+ kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL))));
- CCTK_REAL dtbetaxL = theta*(Xt1L + beta1L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B1L -
- Xt1L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetaxL =
+ kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
- CCTK_REAL dtbetayL = theta*(Xt2L + beta2L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B2L -
- Xt2L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetayL =
+ kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
- CCTK_REAL dtbetazL = theta*(Xt3L + beta3L*eta*ToReal(BetaDriver)*(-1 +
- ToReal(ShiftBCoeff)) + (B3L -
- Xt3L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff);
+ CCTK_REAL_VEC dtbetazL =
+ kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 +
+ ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff)));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count);
+ vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count);
+ vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count);
+ vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- dtalp[index] = dtalpL;
- dtbetax[index] = dtbetaxL;
- dtbetay[index] = dtbetayL;
- dtbetaz[index] = dtbetazL;
+ vec_store_nta(dtalp[index],dtalpL);
+ vec_store_nta(dtbetax[index],dtbetaxL);
+ vec_store_nta(dtbetay[index],dtbetayL);
+ vec_store_nta(dtbetaz[index],dtbetazL);
}
- LC_ENDLOOP3 (ML_BSSN_convertToADMBaseFakeDtLapseShift);
+ LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift);
}
extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS)
diff --git a/ML_BSSN/src/ML_BSSN_enforce.cc b/ML_BSSN/src/ML_BSSN_enforce.cc
index 485b834..997a439 100644
--- a/ML_BSSN/src/ML_BSSN_enforce.cc
+++ b/ML_BSSN/src/ML_BSSN_enforce.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,65 +53,66 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_enforce,
+ LC_LOOP3VEC (ML_BSSN_enforce,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
/* Assign local copies of grid functions */
- CCTK_REAL alphaL = alpha[index];
- CCTK_REAL At11L = At11[index];
- CCTK_REAL At12L = At12[index];
- CCTK_REAL At13L = At13[index];
- CCTK_REAL At22L = At22[index];
- CCTK_REAL At23L = At23[index];
- CCTK_REAL At33L = At33[index];
- CCTK_REAL gt11L = gt11[index];
- CCTK_REAL gt12L = gt12[index];
- CCTK_REAL gt13L = gt13[index];
- CCTK_REAL gt22L = gt22[index];
- CCTK_REAL gt23L = gt23[index];
- CCTK_REAL gt33L = gt33[index];
+ CCTK_REAL_VEC alphaL = vec_load(alpha[index]);
+ CCTK_REAL_VEC At11L = vec_load(At11[index]);
+ CCTK_REAL_VEC At12L = vec_load(At12[index]);
+ CCTK_REAL_VEC At13L = vec_load(At13[index]);
+ CCTK_REAL_VEC At22L = vec_load(At22[index]);
+ CCTK_REAL_VEC At23L = vec_load(At23[index]);
+ CCTK_REAL_VEC At33L = vec_load(At33[index]);
+ CCTK_REAL_VEC gt11L = vec_load(gt11[index]);
+ CCTK_REAL_VEC gt12L = vec_load(gt12[index]);
+ CCTK_REAL_VEC gt13L = vec_load(gt13[index]);
+ CCTK_REAL_VEC gt22L = vec_load(gt22[index]);
+ CCTK_REAL_VEC gt23L = vec_load(gt23[index]);
+ CCTK_REAL_VEC gt33L = vec_load(gt33[index]);
/* Include user supplied include files */
@@ -118,47 +120,102 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL detgt = 1;
+ CCTK_REAL_VEC detgt = ToReal(1);
- CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L));
+ CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L)));
- CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt);
+ CCTK_REAL_VEC gtu12 =
+ kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L)));
- CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu13 =
+ kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L)));
- CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L));
+ CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L)));
- CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt);
+ CCTK_REAL_VEC gtu23 =
+ kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L)));
- CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L));
+ CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L)));
- CCTK_REAL trAt = At11L*gtu11 + At22L*gtu22 + 2*(At12L*gtu12 +
- At13L*gtu13 + At23L*gtu23) + At33L*gtu33;
+ CCTK_REAL_VEC trAt =
+ kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2)))));
- At11L = At11L - 0.333333333333333333333333333333*gt11L*trAt;
+ At11L =
+ kmadd(gt11L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At11L);
- At12L = At12L - 0.333333333333333333333333333333*gt12L*trAt;
+ At12L =
+ kmadd(gt12L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At12L);
- At13L = At13L - 0.333333333333333333333333333333*gt13L*trAt;
+ At13L =
+ kmadd(gt13L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At13L);
- At22L = At22L - 0.333333333333333333333333333333*gt22L*trAt;
+ At22L =
+ kmadd(gt22L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At22L);
- At23L = At23L - 0.333333333333333333333333333333*gt23L*trAt;
+ At23L =
+ kmadd(gt23L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At23L);
- At33L = At33L - 0.333333333333333333333333333333*gt33L*trAt;
+ At33L =
+ kmadd(gt33L,kmul(trAt,ToReal(-0.333333333333333333333333333333)),At33L);
- alphaL = fmax(alphaL,ToReal(MinimumLapse));
+ alphaL = kfmax(alphaL,ToReal(MinimumLapse));
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_hi(At11[index],At11L,elt_count);
+ vec_store_nta_partial_hi(At12[index],At12L,elt_count);
+ vec_store_nta_partial_hi(At13[index],At13L,elt_count);
+ vec_store_nta_partial_hi(At22[index],At22L,elt_count);
+ vec_store_nta_partial_hi(At23[index],At23L,elt_count);
+ vec_store_nta_partial_hi(At33[index],At33L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpha[index],alphaL,elt_count);
+ vec_store_nta_partial_lo(At11[index],At11L,elt_count);
+ vec_store_nta_partial_lo(At12[index],At12L,elt_count);
+ vec_store_nta_partial_lo(At13[index],At13L,elt_count);
+ vec_store_nta_partial_lo(At22[index],At22L,elt_count);
+ vec_store_nta_partial_lo(At23[index],At23L,elt_count);
+ vec_store_nta_partial_lo(At33[index],At33L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpha[index] = alphaL;
- At11[index] = At11L;
- At12[index] = At12L;
- At13[index] = At13L;
- At22[index] = At22L;
- At23[index] = At23L;
- At33[index] = At33L;
+ vec_store_nta(alpha[index],alphaL);
+ vec_store_nta(At11[index],At11L);
+ vec_store_nta(At12[index],At12L);
+ vec_store_nta(At13[index],At13L);
+ vec_store_nta(At22[index],At22L);
+ vec_store_nta(At23[index],At23L);
+ vec_store_nta(At33[index],At33L);
}
- LC_ENDLOOP3 (ML_BSSN_enforce);
+ LC_ENDLOOP3VEC (ML_BSSN_enforce);
}
extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS)