aboutsummaryrefslogtreecommitdiff
path: root/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc')
-rw-r--r--ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc280
1 files changed, 191 insertions, 89 deletions
diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
index d5809e7..6d1c90d 100644
--- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
+++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
extern "C" void ML_BSSN_RHSStaticBoundary_SelectBCs(CCTK_ARGUMENTS)
{
@@ -88,47 +89,48 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx);
- CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy);
- CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz);
- CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy);
- CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz);
- CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz);
- CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx);
- CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy);
- CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz);
- CCTK_REAL const p1o64dx = 0.015625*INV(dx);
- CCTK_REAL const p1o64dy = 0.015625*INV(dy);
- CCTK_REAL const p1o64dz = 0.015625*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx));
- CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy));
- CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz));
+ CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444)));
+ CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667));
+ CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625));
+ CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
+ CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333));
+ CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333));
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_RHSStaticBoundary,
+ LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -141,84 +143,184 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL phirhsL = 0;
+ CCTK_REAL_VEC phirhsL = ToReal(0);
- CCTK_REAL gt11rhsL = 0;
+ CCTK_REAL_VEC gt11rhsL = ToReal(0);
- CCTK_REAL gt12rhsL = 0;
+ CCTK_REAL_VEC gt12rhsL = ToReal(0);
- CCTK_REAL gt13rhsL = 0;
+ CCTK_REAL_VEC gt13rhsL = ToReal(0);
- CCTK_REAL gt22rhsL = 0;
+ CCTK_REAL_VEC gt22rhsL = ToReal(0);
- CCTK_REAL gt23rhsL = 0;
+ CCTK_REAL_VEC gt23rhsL = ToReal(0);
- CCTK_REAL gt33rhsL = 0;
+ CCTK_REAL_VEC gt33rhsL = ToReal(0);
- CCTK_REAL trKrhsL = 0;
+ CCTK_REAL_VEC trKrhsL = ToReal(0);
- CCTK_REAL At11rhsL = 0;
+ CCTK_REAL_VEC At11rhsL = ToReal(0);
- CCTK_REAL At12rhsL = 0;
+ CCTK_REAL_VEC At12rhsL = ToReal(0);
- CCTK_REAL At13rhsL = 0;
+ CCTK_REAL_VEC At13rhsL = ToReal(0);
- CCTK_REAL At22rhsL = 0;
+ CCTK_REAL_VEC At22rhsL = ToReal(0);
- CCTK_REAL At23rhsL = 0;
+ CCTK_REAL_VEC At23rhsL = ToReal(0);
- CCTK_REAL At33rhsL = 0;
+ CCTK_REAL_VEC At33rhsL = ToReal(0);
- CCTK_REAL Xt1rhsL = 0;
+ CCTK_REAL_VEC Xt1rhsL = ToReal(0);
- CCTK_REAL Xt2rhsL = 0;
+ CCTK_REAL_VEC Xt2rhsL = ToReal(0);
- CCTK_REAL Xt3rhsL = 0;
+ CCTK_REAL_VEC Xt3rhsL = ToReal(0);
- CCTK_REAL alpharhsL = 0;
+ CCTK_REAL_VEC alpharhsL = ToReal(0);
- CCTK_REAL ArhsL = 0;
+ CCTK_REAL_VEC ArhsL = ToReal(0);
- CCTK_REAL beta1rhsL = 0;
+ CCTK_REAL_VEC beta1rhsL = ToReal(0);
- CCTK_REAL beta2rhsL = 0;
+ CCTK_REAL_VEC beta2rhsL = ToReal(0);
- CCTK_REAL beta3rhsL = 0;
+ CCTK_REAL_VEC beta3rhsL = ToReal(0);
- CCTK_REAL B1rhsL = 0;
+ CCTK_REAL_VEC B1rhsL = ToReal(0);
- CCTK_REAL B2rhsL = 0;
+ CCTK_REAL_VEC B2rhsL = ToReal(0);
- CCTK_REAL B3rhsL = 0;
+ CCTK_REAL_VEC B3rhsL = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count);
+ vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count);
+ vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count);
+ vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count);
+ vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count);
+ vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count);
+ vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count);
+ vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count);
+ vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count);
+ vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count);
+ vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count);
+ vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count);
+ vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count);
+ vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count);
+ vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count);
+ vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count);
+ vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count);
+ vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count);
+ vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count);
+ vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count);
+ vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count);
+ vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count);
+ vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count);
+ vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- alpharhs[index] = alpharhsL;
- Arhs[index] = ArhsL;
- At11rhs[index] = At11rhsL;
- At12rhs[index] = At12rhsL;
- At13rhs[index] = At13rhsL;
- At22rhs[index] = At22rhsL;
- At23rhs[index] = At23rhsL;
- At33rhs[index] = At33rhsL;
- B1rhs[index] = B1rhsL;
- B2rhs[index] = B2rhsL;
- B3rhs[index] = B3rhsL;
- beta1rhs[index] = beta1rhsL;
- beta2rhs[index] = beta2rhsL;
- beta3rhs[index] = beta3rhsL;
- gt11rhs[index] = gt11rhsL;
- gt12rhs[index] = gt12rhsL;
- gt13rhs[index] = gt13rhsL;
- gt22rhs[index] = gt22rhsL;
- gt23rhs[index] = gt23rhsL;
- gt33rhs[index] = gt33rhsL;
- phirhs[index] = phirhsL;
- trKrhs[index] = trKrhsL;
- Xt1rhs[index] = Xt1rhsL;
- Xt2rhs[index] = Xt2rhsL;
- Xt3rhs[index] = Xt3rhsL;
+ vec_store_nta(alpharhs[index],alpharhsL);
+ vec_store_nta(Arhs[index],ArhsL);
+ vec_store_nta(At11rhs[index],At11rhsL);
+ vec_store_nta(At12rhs[index],At12rhsL);
+ vec_store_nta(At13rhs[index],At13rhsL);
+ vec_store_nta(At22rhs[index],At22rhsL);
+ vec_store_nta(At23rhs[index],At23rhsL);
+ vec_store_nta(At33rhs[index],At33rhsL);
+ vec_store_nta(B1rhs[index],B1rhsL);
+ vec_store_nta(B2rhs[index],B2rhsL);
+ vec_store_nta(B3rhs[index],B3rhsL);
+ vec_store_nta(beta1rhs[index],beta1rhsL);
+ vec_store_nta(beta2rhs[index],beta2rhsL);
+ vec_store_nta(beta3rhs[index],beta3rhsL);
+ vec_store_nta(gt11rhs[index],gt11rhsL);
+ vec_store_nta(gt12rhs[index],gt12rhsL);
+ vec_store_nta(gt13rhs[index],gt13rhsL);
+ vec_store_nta(gt22rhs[index],gt22rhsL);
+ vec_store_nta(gt23rhs[index],gt23rhsL);
+ vec_store_nta(gt33rhs[index],gt33rhsL);
+ vec_store_nta(phirhs[index],phirhsL);
+ vec_store_nta(trKrhs[index],trKrhsL);
+ vec_store_nta(Xt1rhs[index],Xt1rhsL);
+ vec_store_nta(Xt2rhs[index],Xt2rhsL);
+ vec_store_nta(Xt3rhs[index],Xt3rhsL);
}
- LC_ENDLOOP3 (ML_BSSN_RHSStaticBoundary);
+ LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary);
}
extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS)