diff options
author | Barry Wardell <barry.wardell@gmail.com> | 2012-04-27 10:59:10 +0100 |
---|---|---|
committer | Barry Wardell <barry.wardell@gmail.com> | 2012-04-27 10:59:10 +0100 |
commit | f71ebd54d52ffe0b4755ae64fcec82c48a1d76c7 (patch) | |
tree | bd516b6e2110d62fa5cf136b4b0fec593a3b9fd9 | |
parent | b5b0de9ec6f63188057c3b8b4bdb020e903738ae (diff) | |
parent | af04ce2e3d96d19b94dd4b416a6617b649c6da34 (diff) |
Merge branch 'master' into CCZ4
Conflicts:
m/McLachlan_BSSN.m
186 files changed, 8392 insertions, 12353 deletions
diff --git a/ML_ADM/param.ccl b/ML_ADM/param.ccl index 8c9efe8..5c2cbb2 100644 --- a/ML_ADM/param.ccl +++ b/ML_ADM/param.ccl @@ -64,6 +64,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADM_Minkowski_calc_every "ML_ADM_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADM/schedule.ccl b/ML_ADM/schedule.ccl index 93e7205..1745ad3 100644 --- a/ML_ADM/schedule.ccl +++ b/ML_ADM/schedule.ccl @@ -1,9 +1,15 @@ # File produced by Kranc -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { diff --git a/ML_ADM/src/ML_ADM_Minkowski.cc b/ML_ADM/src/ML_ADM_Minkowski.cc index 52b8daa..f220dd2 100644 --- a/ML_ADM/src/ML_ADM_Minkowski.cc +++ b/ML_ADM/src/ML_ADM_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" diff --git a/ML_ADM/src/ML_ADM_RHS.cc b/ML_ADM/src/ML_ADM_RHS.cc index 7e180d3..4aa5793 100644 --- a/ML_ADM/src/ML_ADM_RHS.cc +++ b/ML_ADM/src/ML_ADM_RHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -1305,19 +1306,25 @@ static void ML_ADM_RHS_Body(cGH const * restrict const cctkGH, int const dir, in CCTK_REAL_VEC gu11 = kmul(INV(detg),kmsub(g22L,g33L,SQR(g23L))); - CCTK_REAL_VEC gu12 = kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); + CCTK_REAL_VEC gu12 = + kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); - CCTK_REAL_VEC gu13 = kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); + CCTK_REAL_VEC gu13 = + kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); - CCTK_REAL_VEC gu21 = kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); + CCTK_REAL_VEC gu21 = + kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); CCTK_REAL_VEC gu22 = kmul(INV(detg),kmsub(g11L,g33L,SQR(g13L))); - CCTK_REAL_VEC gu23 = kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); + CCTK_REAL_VEC gu23 = + kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); - CCTK_REAL_VEC gu31 = kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); + CCTK_REAL_VEC gu31 = + kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); - CCTK_REAL_VEC gu32 = kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); + CCTK_REAL_VEC gu32 = + kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); CCTK_REAL_VEC gu33 = kmul(INV(detg),kmsub(g11L,g22L,SQR(g12L))); @@ -1394,31 +1401,31 @@ static void ML_ADM_RHS_Body(cGH const * restrict const cctkGH, int const dir, in kmul(ToReal(0.5),kmadd(gu12,JacPDstandardNth23g13,kmadd(gu21,JacPDstandardNth23g13,kmadd(kmadd(G111,G133,kmadd(G133,G212,kmadd(G112,G233,kmadd(G222,G233,kmadd(G113,G333,kmul(G223,G333)))))),ToReal(-2),kmadd(gu31,ksub(JacPDstandardNth13g33,JacPDstandardNth31g33),kmadd(gu32,ksub(JacPDstandardNth23g33,JacPDstandardNth32g33),knmsub(gu11,kadd(JacPDstandardNth33g11,JacPDstandardNth11g33),kmadd(gu12,ksub(JacPDstandardNth13g23,kadd(JacPDstandardNth33g12,JacPDstandardNth12g33)),kmadd(gu21,ksub(JacPDstandardNth13g23,kadd(JacPDstandardNth33g12,JacPDstandardNth21g33)),kmadd(kmadd(G133,G313,kmadd(G233,G323,kmul(gu11,JacPDstandardNth13g13))),ToReal(2),kmadd(SQR(G113),ToReal(2),kmadd(SQR(G223),ToReal(2),kmadd(gu22,ksub(kmsub(JacPDstandardNth23g23,ToReal(2),JacPDstandardNth33g22),JacPDstandardNth22g33),kmul(G123,kmul(G213,ToReal(4)))))))))))))))); CCTK_REAL_VEC Km11 = - kmadd(gu11,K11L,kmadd(gu12,K12L,kmul(gu13,K13L))); + kmadd(K11L,gu11,kmadd(K12L,gu12,kmul(K13L,gu13))); CCTK_REAL_VEC Km21 = - kmadd(gu21,K11L,kmadd(gu22,K12L,kmul(gu23,K13L))); + kmadd(K11L,gu21,kmadd(K12L,gu22,kmul(K13L,gu23))); CCTK_REAL_VEC Km31 = - kmadd(gu31,K11L,kmadd(gu32,K12L,kmul(gu33,K13L))); + kmadd(K11L,gu31,kmadd(K12L,gu32,kmul(K13L,gu33))); CCTK_REAL_VEC Km12 = - kmadd(gu11,K12L,kmadd(gu12,K22L,kmul(gu13,K23L))); + kmadd(K12L,gu11,kmadd(K22L,gu12,kmul(K23L,gu13))); CCTK_REAL_VEC Km22 = - kmadd(gu21,K12L,kmadd(gu22,K22L,kmul(gu23,K23L))); + kmadd(K12L,gu21,kmadd(K22L,gu22,kmul(K23L,gu23))); CCTK_REAL_VEC Km32 = - kmadd(gu31,K12L,kmadd(gu32,K22L,kmul(gu33,K23L))); + kmadd(K12L,gu31,kmadd(K22L,gu32,kmul(K23L,gu33))); CCTK_REAL_VEC Km13 = - kmadd(gu11,K13L,kmadd(gu12,K23L,kmul(gu13,K33L))); + kmadd(K13L,gu11,kmadd(K23L,gu12,kmul(K33L,gu13))); CCTK_REAL_VEC Km23 = - kmadd(gu21,K13L,kmadd(gu22,K23L,kmul(gu23,K33L))); + kmadd(K13L,gu21,kmadd(K23L,gu22,kmul(K33L,gu23))); CCTK_REAL_VEC Km33 = - kmadd(gu31,K13L,kmadd(gu32,K23L,kmul(gu33,K33L))); + kmadd(K13L,gu31,kmadd(K23L,gu32,kmul(K33L,gu33))); CCTK_REAL_VEC trK = kadd(Km11,kadd(Km22,Km33)); @@ -1441,22 +1448,22 @@ static void ML_ADM_RHS_Body(cGH const * restrict const cctkGH, int const dir, in kmadd(beta1L,JacPDstandardNth1g33,kmadd(beta2L,JacPDstandardNth2g33,kmadd(beta3L,JacPDstandardNth3g33,kmadd(alphaL,kmul(K33L,ToReal(-2)),kmul(kmadd(g13L,JacPDstandardNth3beta1,kmadd(g23L,JacPDstandardNth3beta2,kmul(g33L,JacPDstandardNth3beta3))),ToReal(2)))))); CCTK_REAL_VEC K11rhsL = - kmadd(G111,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K11,kmadd(G211,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K11,kmadd(G311,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K11,kmadd(alphaL,kadd(R11,kmadd(kmadd(K12L,Km21,kmul(K13L,Km31)),ToReal(-2),kmul(K11L,kmadd(Km11,ToReal(-2),trK)))),kmsub(kmadd(JacPDstandardNth1beta1,K11L,kmadd(JacPDstandardNth1beta2,K12L,kmul(JacPDstandardNth1beta3,K13L))),ToReal(2),JacPDstandardNth11alpha)))))))); + kmadd(G111,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K11,kmadd(G211,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K11,kmadd(G311,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K11,kmadd(alphaL,kadd(R11,kmadd(kmadd(K12L,Km21,kmul(K13L,Km31)),ToReal(-2),kmul(K11L,kmadd(Km11,ToReal(-2),trK)))),kmsub(kmadd(K11L,JacPDstandardNth1beta1,kmadd(K12L,JacPDstandardNth1beta2,kmul(K13L,JacPDstandardNth1beta3))),ToReal(2),JacPDstandardNth11alpha)))))))); CCTK_REAL_VEC K12rhsL = - kmadd(G112,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K12,kmadd(G212,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K12,kmadd(G312,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K12,kmadd(JacPDstandardNth2beta1,K11L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),K12L,kmadd(JacPDstandardNth2beta3,K13L,kmadd(JacPDstandardNth1beta2,K22L,kmadd(JacPDstandardNth1beta3,K23L,kmsub(alphaL,kadd(R12,kmadd(kmadd(K11L,Km12,kmul(K13L,Km32)),ToReal(-2),kmul(K12L,kmadd(Km22,ToReal(-2),trK)))),JacPDstandardNth12alpha)))))))))))); + kmadd(G112,JacPDstandardNth1alpha,kmadd(K22L,JacPDstandardNth1beta2,kmadd(K23L,JacPDstandardNth1beta3,kmadd(beta1L,JacPDstandardNth1K12,kmadd(G212,JacPDstandardNth2alpha,kmadd(K11L,JacPDstandardNth2beta1,kmadd(K12L,kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),kmadd(K13L,JacPDstandardNth2beta3,kmadd(beta2L,JacPDstandardNth2K12,kmadd(G312,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K12,kmsub(alphaL,kadd(R12,kmadd(kmadd(K11L,Km12,kmul(K13L,Km32)),ToReal(-2),kmul(K12L,kmadd(Km22,ToReal(-2),trK)))),JacPDstandardNth12alpha)))))))))))); CCTK_REAL_VEC K13rhsL = - kmadd(G113,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K13,kmadd(G213,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K13,kmadd(G313,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K13,kmadd(JacPDstandardNth3beta1,K11L,kmadd(JacPDstandardNth3beta2,K12L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),K13L,kmadd(JacPDstandardNth1beta2,K23L,kmadd(JacPDstandardNth1beta3,K33L,kmsub(alphaL,kadd(R13,kmadd(K13L,trK,kmul(kmadd(K11L,Km13,kmadd(K12L,Km23,kmul(K13L,Km33))),ToReal(-2)))),JacPDstandardNth13alpha)))))))))))); + kmadd(G113,JacPDstandardNth1alpha,kmadd(K23L,JacPDstandardNth1beta2,kmadd(K33L,JacPDstandardNth1beta3,kmadd(beta1L,JacPDstandardNth1K13,kmadd(G213,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K13,kmadd(G313,JacPDstandardNth3alpha,kmadd(K11L,JacPDstandardNth3beta1,kmadd(K12L,JacPDstandardNth3beta2,kmadd(K13L,kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),kmadd(beta3L,JacPDstandardNth3K13,kmsub(alphaL,kadd(R13,kmadd(K13L,trK,kmul(kmadd(K11L,Km13,kmadd(K12L,Km23,kmul(K13L,Km33))),ToReal(-2)))),JacPDstandardNth13alpha)))))))))))); CCTK_REAL_VEC K22rhsL = - kmadd(G122,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K22,kmadd(G222,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K22,kmadd(G322,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K22,kmadd(alphaL,kadd(R22,kmadd(kmadd(K12L,Km12,kmul(K23L,Km32)),ToReal(-2),kmul(K22L,kmadd(Km22,ToReal(-2),trK)))),kmsub(kmadd(JacPDstandardNth2beta1,K12L,kmadd(JacPDstandardNth2beta2,K22L,kmul(JacPDstandardNth2beta3,K23L))),ToReal(2),JacPDstandardNth22alpha)))))))); + kmadd(G122,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K22,kmadd(G222,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K22,kmadd(G322,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K22,kmadd(alphaL,kadd(R22,kmadd(kmadd(K12L,Km12,kmul(K23L,Km32)),ToReal(-2),kmul(K22L,kmadd(Km22,ToReal(-2),trK)))),kmsub(kmadd(K12L,JacPDstandardNth2beta1,kmadd(K22L,JacPDstandardNth2beta2,kmul(K23L,JacPDstandardNth2beta3))),ToReal(2),JacPDstandardNth22alpha)))))))); CCTK_REAL_VEC K23rhsL = - kmadd(G123,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K23,kmadd(G223,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K23,kmadd(G323,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K23,kmadd(JacPDstandardNth3beta1,K12L,kmadd(JacPDstandardNth2beta1,K13L,kmadd(JacPDstandardNth3beta2,K22L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),K23L,kmadd(JacPDstandardNth2beta3,K33L,kmsub(alphaL,kadd(R23,kmadd(K23L,trK,kmul(kmadd(K12L,Km13,kmadd(K22L,Km23,kmul(K23L,Km33))),ToReal(-2)))),JacPDstandardNth23alpha)))))))))))); + kmadd(G123,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K23,kmadd(G223,JacPDstandardNth2alpha,kmadd(K13L,JacPDstandardNth2beta1,kmadd(K33L,JacPDstandardNth2beta3,kmadd(beta2L,JacPDstandardNth2K23,kmadd(G323,JacPDstandardNth3alpha,kmadd(K12L,JacPDstandardNth3beta1,kmadd(K22L,JacPDstandardNth3beta2,kmadd(K23L,kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),kmadd(beta3L,JacPDstandardNth3K23,kmsub(alphaL,kadd(R23,kmadd(K23L,trK,kmul(kmadd(K12L,Km13,kmadd(K22L,Km23,kmul(K23L,Km33))),ToReal(-2)))),JacPDstandardNth23alpha)))))))))))); CCTK_REAL_VEC K33rhsL = - kmadd(G133,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K33,kmadd(G233,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K33,kmadd(G333,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K33,kmadd(alphaL,kadd(R33,kmadd(K33L,trK,kmul(kmadd(K13L,Km13,kmadd(K23L,Km23,kmul(K33L,Km33))),ToReal(-2)))),kmsub(kmadd(JacPDstandardNth3beta1,K13L,kmadd(JacPDstandardNth3beta2,K23L,kmul(JacPDstandardNth3beta3,K33L))),ToReal(2),JacPDstandardNth33alpha)))))))); + kmadd(G133,JacPDstandardNth1alpha,kmadd(beta1L,JacPDstandardNth1K33,kmadd(G233,JacPDstandardNth2alpha,kmadd(beta2L,JacPDstandardNth2K33,kmadd(G333,JacPDstandardNth3alpha,kmadd(beta3L,JacPDstandardNth3K33,kmadd(alphaL,kadd(R33,kmadd(K33L,trK,kmul(kmadd(K13L,Km13,kmadd(K23L,Km23,kmul(K33L,Km33))),ToReal(-2)))),kmsub(kmadd(K13L,JacPDstandardNth3beta1,kmadd(K23L,JacPDstandardNth3beta2,kmul(K33L,JacPDstandardNth3beta3))),ToReal(2),JacPDstandardNth33alpha)))))))); CCTK_REAL_VEC alpharhsL = ToReal(0); diff --git a/ML_ADM/src/ML_ADM_boundary.cc b/ML_ADM/src/ML_ADM_boundary.cc index a205016..d4d81cf 100644 --- a/ML_ADM/src/ML_ADM_boundary.cc +++ b/ML_ADM/src/ML_ADM_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" diff --git a/ML_ADM/src/ML_ADM_constraints.cc b/ML_ADM/src/ML_ADM_constraints.cc index 8385163..3175e41 100644 --- a/ML_ADM/src/ML_ADM_constraints.cc +++ b/ML_ADM/src/ML_ADM_constraints.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -1079,19 +1080,25 @@ static void ML_ADM_constraints_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gu11 = kmul(INV(detg),kmsub(g22L,g33L,SQR(g23L))); - CCTK_REAL_VEC gu12 = kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); + CCTK_REAL_VEC gu12 = + kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); - CCTK_REAL_VEC gu13 = kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); + CCTK_REAL_VEC gu13 = + kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); - CCTK_REAL_VEC gu21 = kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); + CCTK_REAL_VEC gu21 = + kmul(INV(detg),kmsub(g13L,g23L,kmul(g12L,g33L))); CCTK_REAL_VEC gu22 = kmul(INV(detg),kmsub(g11L,g33L,SQR(g13L))); - CCTK_REAL_VEC gu23 = kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); + CCTK_REAL_VEC gu23 = + kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); - CCTK_REAL_VEC gu31 = kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); + CCTK_REAL_VEC gu31 = + kmul(INV(detg),kmsub(g12L,g23L,kmul(g13L,g22L))); - CCTK_REAL_VEC gu32 = kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); + CCTK_REAL_VEC gu32 = + kmul(INV(detg),kmsub(g12L,g13L,kmul(g11L,g23L))); CCTK_REAL_VEC gu33 = kmul(INV(detg),kmsub(g11L,g22L,SQR(g12L))); @@ -1150,52 +1157,52 @@ static void ML_ADM_constraints_Body(cGH const * restrict const cctkGH, int const kmul(ToReal(0.5),kmadd(gu33,JacPDstandardNth3g33,kmadd(gu31,kmsub(JacPDstandardNth3g13,ToReal(2),JacPDstandardNth1g33),kmul(gu32,kmsub(JacPDstandardNth3g23,ToReal(2),JacPDstandardNth2g33))))); CCTK_REAL_VEC R11 = - kmul(ToReal(0.5),knmsub(gu22,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth21g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu12,ksub(JacPDstandardNth21g11,JacPDstandardNth12g11),kmadd(gu13,ksub(JacPDstandardNth31g11,JacPDstandardNth13g11),kmadd(gu23,kadd(JacPDstandardNth21g13,ksub(JacPDstandardNth31g12,kadd(JacPDstandardNth23g11,JacPDstandardNth11g23))),kmadd(gu32,kadd(JacPDstandardNth21g13,ksub(JacPDstandardNth31g12,kadd(JacPDstandardNth32g11,JacPDstandardNth11g23))),kmadd(ToReal(2),kmadd(gu23,kmadd(G112,kmul(g13L,G313),kmadd(G212,kmul(g23L,G313),kmadd(G312,kmul(G313,g33L),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmsub(g23L,kmsub(G213,G312,kmul(G211,G323)),kmul(G311,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L)))))))))))),kmadd(gu32,kmadd(G112,kmul(g13L,G313),kmadd(G212,kmul(g23L,G313),kmadd(G312,kmul(G313,g33L),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmsub(g23L,kmsub(G213,G312,kmul(G211,G323)),kmul(G311,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L)))))))))))),kmul(gu22,kmadd(g22L,SQR(G212),knmsub(g12L,kmadd(G122,G211,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2))))),kmadd(g11L,knmsub(G111,G122,SQR(G112)),knmsub(G222,kmadd(g23L,G311,kmul(G211,g22L)),kmadd(g33L,knmsub(G311,G322,SQR(G312)),kmadd(g13L,knmsub(G122,G311,kmsub(G112,kmul(G312,ToReal(2)),kmul(G111,G322))),kmul(g23L,kmsub(G212,kmul(G312,ToReal(2)),kmul(G211,G322)))))))))))),kmul(gu33,ksub(ksub(kmadd(JacPDstandardNth31g13,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G213),knmsub(g12L,kmadd(G133,G211,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2))))),kmadd(g11L,knmsub(G111,G133,SQR(G113)),knmsub(G233,kmadd(g23L,G311,kmul(G211,g22L)),kmadd(g33L,knmsub(G311,G333,SQR(G313)),kmadd(g13L,knmsub(G133,G311,kmsub(G113,kmul(G313,ToReal(2)),kmul(G111,G333))),kmul(g23L,kmsub(G213,kmul(G313,ToReal(2)),kmul(G211,G333))))))))))),JacPDstandardNth33g11),JacPDstandardNth11g33))))))))); + kmul(ToReal(0.5),knmsub(gu22,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth21g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu12,ksub(JacPDstandardNth21g11,JacPDstandardNth12g11),kmadd(gu13,ksub(JacPDstandardNth31g11,JacPDstandardNth13g11),kmadd(gu23,kadd(JacPDstandardNth21g13,ksub(JacPDstandardNth31g12,kadd(JacPDstandardNth23g11,JacPDstandardNth11g23))),kmadd(gu32,kadd(JacPDstandardNth21g13,ksub(JacPDstandardNth31g12,kadd(JacPDstandardNth32g11,JacPDstandardNth11g23))),kmadd(ToReal(2),kmadd(gu23,kmadd(g13L,kmul(G112,G313),kmadd(g23L,kmul(G212,G313),kmadd(g33L,kmul(G312,G313),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),knmsub(G311,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmul(g23L,kmsub(G213,G312,kmul(G211,G323))))))))))),kmadd(gu32,kmadd(g13L,kmul(G112,G313),kmadd(g23L,kmul(G212,G313),kmadd(g33L,kmul(G312,G313),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),knmsub(G311,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmul(g23L,kmsub(G213,G312,kmul(G211,G323))))))))))),kmul(gu22,kmadd(g22L,SQR(G212),knmsub(g12L,kmadd(G122,G211,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2))))),kmadd(g11L,knmsub(G111,G122,SQR(G112)),knmsub(G222,kmadd(g23L,G311,kmul(g22L,G211)),kmadd(g33L,knmsub(G311,G322,SQR(G312)),kmadd(g13L,knmsub(G122,G311,kmsub(G112,kmul(G312,ToReal(2)),kmul(G111,G322))),kmul(g23L,kmsub(G212,kmul(G312,ToReal(2)),kmul(G211,G322)))))))))))),kmul(gu33,ksub(ksub(kmadd(JacPDstandardNth31g13,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G213),knmsub(g12L,kmadd(G133,G211,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2))))),kmadd(g11L,knmsub(G111,G133,SQR(G113)),knmsub(G233,kmadd(g23L,G311,kmul(g22L,G211)),kmadd(g33L,knmsub(G311,G333,SQR(G313)),kmadd(g13L,knmsub(G133,G311,kmsub(G113,kmul(G313,ToReal(2)),kmul(G111,G333))),kmul(g23L,kmsub(G213,kmul(G313,ToReal(2)),kmul(G211,G333))))))))))),JacPDstandardNth33g11),JacPDstandardNth11g33))))))))); CCTK_REAL_VEC R12 = - kmul(ToReal(0.5),kmadd(gu12,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth12g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu22,ksub(JacPDstandardNth21g22,JacPDstandardNth12g22),kmadd(gu13,kadd(JacPDstandardNth11g23,ksub(JacPDstandardNth32g11,kadd(JacPDstandardNth13g12,JacPDstandardNth12g13))),kmadd(gu23,kadd(JacPDstandardNth21g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth23g12,JacPDstandardNth12g23))),kmadd(gu32,kadd(JacPDstandardNth22g13,ksub(JacPDstandardNth31g22,kadd(JacPDstandardNth32g12,JacPDstandardNth12g23))),kmadd(gu33,kadd(JacPDstandardNth31g23,ksub(JacPDstandardNth32g13,kadd(JacPDstandardNth33g12,JacPDstandardNth12g33))),kmul(kmadd(gu21,kmadd(G122,kmadd(G111,g11L,kmadd(g12L,G211,kmul(g13L,G311))),kmadd(G222,kmadd(G211,g22L,kmul(g23L,G311)),kmadd(G322,kmadd(G111,g13L,kmadd(G211,g23L,kmul(G311,g33L))),kmadd(kmadd(G112,kmul(g13L,G312),kmul(G212,kmul(g23L,G312))),ToReal(-2),kmsub(g12L,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2)))),kmadd(g11L,SQR(G112),kmadd(g33L,SQR(G312),kmul(g22L,SQR(G212))))))))),kmadd(gu31,kmadd(G111,kmul(g13L,G323),kmadd(G211,kmul(g23L,G323),kmadd(G311,kmadd(G123,g13L,kmadd(G223,g23L,kmul(G323,g33L))),kmadd(g11L,kmsub(G111,G123,kmul(G112,G113)),kmadd(g12L,kmadd(G123,G211,kmsub(G111,G223,kmadd(G112,G213,kmul(G113,G212)))),kmsub(g22L,kmsub(G211,G223,kmul(G212,G213)),kmadd(G312,kmadd(G113,g13L,kmadd(G313,g33L,kmul(G213,g23L))),kmul(G313,kmadd(G212,g23L,kmul(G112,g13L)))))))))),kmadd(gu23,kmadd(G113,kmul(g13L,G322),kmadd(G213,kmul(g23L,G322),kmadd(G313,kmul(G322,g33L),kmadd(g11L,kmsub(G113,G122,kmul(G112,G123)),kmadd(g12L,kmadd(G122,G213,kmsub(G113,G222,kmadd(G112,G223,kmul(G123,G212)))),kmadd(g22L,kmsub(G213,G222,kmul(G212,G223)),kmadd(g13L,kmsub(G122,G313,kmul(G112,G323)),kmsub(g23L,kmsub(G222,G313,kmul(G212,G323)),kmul(G312,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L)))))))))))),kmul(gu33,kmadd(G113,kmul(g13L,G323),kmadd(G213,kmul(g23L,G323),kmadd(G313,kmul(G323,g33L),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmsub(g23L,kmsub(G223,G313,kmul(G212,G333)),kmul(G312,kmadd(G133,g13L,kmadd(G333,g33L,kmul(G233,g23L)))))))))))))))),ToReal(2))))))))); + kmul(ToReal(0.5),kmadd(gu12,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth12g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu22,ksub(JacPDstandardNth21g22,JacPDstandardNth12g22),kmadd(gu13,kadd(JacPDstandardNth11g23,ksub(JacPDstandardNth32g11,kadd(JacPDstandardNth13g12,JacPDstandardNth12g13))),kmadd(gu23,kadd(JacPDstandardNth21g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth23g12,JacPDstandardNth12g23))),kmadd(gu32,kadd(JacPDstandardNth22g13,ksub(JacPDstandardNth31g22,kadd(JacPDstandardNth32g12,JacPDstandardNth12g23))),kmadd(gu33,kadd(JacPDstandardNth31g23,ksub(JacPDstandardNth32g13,kadd(JacPDstandardNth33g12,JacPDstandardNth12g33))),kmul(kmadd(gu21,kmadd(G122,kmadd(g11L,G111,kmadd(g12L,G211,kmul(g13L,G311))),kmadd(G222,kmadd(g22L,G211,kmul(g23L,G311)),kmadd(kmadd(g13L,G111,kmadd(g23L,G211,kmul(g33L,G311))),G322,kmadd(kmadd(g13L,kmul(G112,G312),kmul(g23L,kmul(G212,G312))),ToReal(-2),kmsub(g12L,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2)))),kmadd(g11L,SQR(G112),kmadd(g33L,SQR(G312),kmul(g22L,SQR(G212))))))))),kmadd(gu31,kmadd(g13L,kmul(G111,G323),kmadd(g23L,kmul(G211,G323),kmadd(G311,kmadd(g13L,G123,kmadd(g23L,G223,kmul(g33L,G323))),kmadd(g11L,kmsub(G111,G123,kmul(G112,G113)),knmsub(G313,kmadd(g23L,G212,kmul(g13L,G112)),kmadd(g12L,kmadd(G123,G211,kmsub(G111,G223,kmadd(G112,G213,kmul(G113,G212)))),kmsub(g22L,kmsub(G211,G223,kmul(G212,G213)),kmul(G312,kmadd(g13L,G113,kmadd(g33L,G313,kmul(g23L,G213))))))))))),kmadd(gu23,kmadd(g13L,kmul(G113,G322),kmadd(g23L,kmul(G213,G322),kmadd(g33L,kmul(G313,G322),kmadd(g11L,kmsub(G113,G122,kmul(G112,G123)),kmadd(g12L,kmadd(G122,G213,kmsub(G113,G222,kmadd(G112,G223,kmul(G123,G212)))),kmadd(g22L,kmsub(G213,G222,kmul(G212,G223)),knmsub(G312,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))),kmadd(g13L,kmsub(G122,G313,kmul(G112,G323)),kmul(g23L,kmsub(G222,G313,kmul(G212,G323))))))))))),kmul(gu33,kmadd(g13L,kmul(G113,G323),kmadd(g23L,kmul(G213,G323),kmadd(g33L,kmul(G313,G323),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),knmsub(G312,kmadd(g13L,G133,kmadd(g33L,G333,kmul(g23L,G233))),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmul(g23L,kmsub(G223,G313,kmul(G212,G333))))))))))))))),ToReal(2))))))))); CCTK_REAL_VEC R13 = - kmul(ToReal(0.5),kmadd(gu13,kadd(JacPDstandardNth11g33,kmadd(JacPDstandardNth13g13,ToReal(-2),JacPDstandardNth33g11)),kmadd(gu12,kadd(JacPDstandardNth11g23,ksub(JacPDstandardNth23g11,kadd(JacPDstandardNth13g12,JacPDstandardNth12g13))),kmadd(gu33,ksub(JacPDstandardNth31g33,JacPDstandardNth13g33),kmadd(gu22,kadd(JacPDstandardNth21g23,ksub(JacPDstandardNth23g12,kadd(JacPDstandardNth22g13,JacPDstandardNth13g22))),kmadd(gu23,kadd(JacPDstandardNth21g33,ksub(JacPDstandardNth33g12,kadd(JacPDstandardNth23g13,JacPDstandardNth13g23))),kmadd(gu32,kadd(JacPDstandardNth23g13,ksub(JacPDstandardNth31g23,kadd(JacPDstandardNth32g13,JacPDstandardNth13g23))),kmul(kmadd(gu31,kmadd(G133,kmadd(G111,g11L,kmadd(g12L,G211,kmul(g13L,G311))),kmadd(G233,kmadd(G211,g22L,kmul(g23L,G311)),kmadd(G333,kmadd(G111,g13L,kmadd(G211,g23L,kmul(G311,g33L))),kmadd(kmadd(G113,kmul(g13L,G313),kmul(G213,kmul(g23L,G313))),ToReal(-2),kmsub(g12L,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2)))),kmadd(g11L,SQR(G113),kmadd(g33L,SQR(G313),kmul(g22L,SQR(G213))))))))),kmadd(gu21,kmadd(G111,kmul(g13L,G323),kmadd(G211,kmul(g23L,G323),kmadd(G311,kmadd(G123,g13L,kmadd(G223,g23L,kmul(G323,g33L))),kmadd(g11L,kmsub(G111,G123,kmul(G112,G113)),kmadd(g12L,kmadd(G123,G211,kmsub(G111,G223,kmadd(G112,G213,kmul(G113,G212)))),kmsub(g22L,kmsub(G211,G223,kmul(G212,G213)),kmadd(G312,kmadd(G113,g13L,kmadd(G313,g33L,kmul(G213,g23L))),kmul(G313,kmadd(G212,g23L,kmul(G112,g13L)))))))))),kmadd(gu22,kmadd(G112,kmul(g13L,G323),kmadd(G212,kmul(g23L,G323),kmadd(G312,kmadd(G123,g13L,kmadd(G223,g23L,kmul(G323,g33L))),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmsub(g22L,kmsub(G212,G223,kmul(G213,G222)),kmadd(G313,kmadd(G122,g13L,kmadd(G322,g33L,kmul(G222,g23L))),kmul(G322,kmadd(G213,g23L,kmul(G113,g13L)))))))))),kmul(gu32,kmadd(G112,kmul(g13L,G333),kmadd(G212,kmul(g23L,G333),kmadd(G312,kmadd(G133,g13L,kmadd(G233,g23L,kmul(G333,g33L))),kmadd(g11L,kmsub(G112,G133,kmul(G113,G123)),kmadd(g12L,kmadd(G133,G212,kmsub(G112,G233,kmadd(G113,G223,kmul(G123,G213)))),kmsub(g22L,kmsub(G212,G233,kmul(G213,G223)),kmadd(G313,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L))),kmul(G323,kmadd(G213,g23L,kmul(G113,g13L)))))))))))))),ToReal(2))))))))); + kmul(ToReal(0.5),kmadd(gu13,kadd(JacPDstandardNth11g33,kmadd(JacPDstandardNth13g13,ToReal(-2),JacPDstandardNth33g11)),kmadd(gu12,kadd(JacPDstandardNth11g23,ksub(JacPDstandardNth23g11,kadd(JacPDstandardNth13g12,JacPDstandardNth12g13))),kmadd(gu33,ksub(JacPDstandardNth31g33,JacPDstandardNth13g33),kmadd(gu22,kadd(JacPDstandardNth21g23,ksub(JacPDstandardNth23g12,kadd(JacPDstandardNth22g13,JacPDstandardNth13g22))),kmadd(gu23,kadd(JacPDstandardNth21g33,ksub(JacPDstandardNth33g12,kadd(JacPDstandardNth23g13,JacPDstandardNth13g23))),kmadd(gu32,kadd(JacPDstandardNth23g13,ksub(JacPDstandardNth31g23,kadd(JacPDstandardNth32g13,JacPDstandardNth13g23))),kmul(kmadd(gu31,kmadd(G133,kmadd(g11L,G111,kmadd(g12L,G211,kmul(g13L,G311))),kmadd(G233,kmadd(g22L,G211,kmul(g23L,G311)),kmadd(kmadd(g13L,G111,kmadd(g23L,G211,kmul(g33L,G311))),G333,kmadd(kmadd(g13L,kmul(G113,G313),kmul(g23L,kmul(G213,G313))),ToReal(-2),kmsub(g12L,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2)))),kmadd(g11L,SQR(G113),kmadd(g33L,SQR(G313),kmul(g22L,SQR(G213))))))))),kmadd(gu21,kmadd(g13L,kmul(G111,G323),kmadd(g23L,kmul(G211,G323),kmadd(G311,kmadd(g13L,G123,kmadd(g23L,G223,kmul(g33L,G323))),kmadd(g11L,kmsub(G111,G123,kmul(G112,G113)),knmsub(G313,kmadd(g23L,G212,kmul(g13L,G112)),kmadd(g12L,kmadd(G123,G211,kmsub(G111,G223,kmadd(G112,G213,kmul(G113,G212)))),kmsub(g22L,kmsub(G211,G223,kmul(G212,G213)),kmul(G312,kmadd(g13L,G113,kmadd(g33L,G313,kmul(g23L,G213))))))))))),kmadd(gu22,kmadd(g13L,kmul(G112,G323),kmadd(g23L,kmul(G212,G323),kmadd(G312,kmadd(g13L,G123,kmadd(g23L,G223,kmul(g33L,G323))),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),knmsub(G322,kmadd(g23L,G213,kmul(g13L,G113)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmsub(g22L,kmsub(G212,G223,kmul(G213,G222)),kmul(G313,kmadd(g13L,G122,kmadd(g33L,G322,kmul(g23L,G222))))))))))),kmul(gu32,kmadd(g13L,kmul(G112,G333),kmadd(g23L,kmul(G212,G333),kmadd(G312,kmadd(g13L,G133,kmadd(g23L,G233,kmul(g33L,G333))),kmadd(g11L,kmsub(G112,G133,kmul(G113,G123)),knmsub(G323,kmadd(g23L,G213,kmul(g13L,G113)),kmadd(g12L,kmadd(G133,G212,kmsub(G112,G233,kmadd(G113,G223,kmul(G123,G213)))),kmsub(g22L,kmsub(G212,G233,kmul(G213,G223)),kmul(G313,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))))))))))))))),ToReal(2))))))))); CCTK_REAL_VEC R22 = - kmul(ToReal(0.5),knmsub(gu11,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth12g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu21,ksub(JacPDstandardNth12g22,JacPDstandardNth21g22),kmadd(gu13,kadd(JacPDstandardNth12g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth22g13,JacPDstandardNth13g22))),kmadd(gu23,ksub(JacPDstandardNth32g22,JacPDstandardNth23g22),kmadd(gu31,kadd(JacPDstandardNth12g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth31g22,JacPDstandardNth22g13))),kmadd(ToReal(2),kmadd(gu13,kmadd(G112,kmul(g13L,G323),kmadd(G212,kmul(g23L,G323),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmadd(g22L,kmsub(G212,G223,kmul(G213,G222)),kmadd(g13L,kmsub(G123,G312,kmadd(G113,G322,kmul(G122,G313))),kmadd(g23L,kmsub(G223,G312,kmadd(G213,G322,kmul(G222,G313))),kmul(g33L,kmsub(G312,G323,kmul(G313,G322)))))))))),kmadd(gu31,kmadd(G112,kmul(g13L,G323),kmadd(G212,kmul(g23L,G323),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmadd(g22L,kmsub(G212,G223,kmul(G213,G222)),kmadd(g13L,kmsub(G123,G312,kmadd(G113,G322,kmul(G122,G313))),kmadd(g23L,kmsub(G223,G312,kmadd(G213,G322,kmul(G222,G313))),kmul(g33L,kmsub(G312,G323,kmul(G313,G322)))))))))),kmul(gu11,kmadd(g22L,SQR(G212),knmsub(g12L,kmadd(G122,G211,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2))))),kmadd(g11L,knmsub(G111,G122,SQR(G112)),knmsub(G222,kmadd(g23L,G311,kmul(G211,g22L)),kmadd(g33L,knmsub(G311,G322,SQR(G312)),kmadd(g13L,knmsub(G122,G311,kmsub(G112,kmul(G312,ToReal(2)),kmul(G111,G322))),kmul(g23L,kmsub(G212,kmul(G312,ToReal(2)),kmul(G211,G322)))))))))))),kmul(gu33,ksub(ksub(kmadd(JacPDstandardNth32g23,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G223),knmsub(g12L,kmadd(G133,G222,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2))))),kmadd(g11L,knmsub(G122,G133,SQR(G123)),knmsub(G233,kmadd(g23L,G322,kmul(G222,g22L)),kmadd(g33L,knmsub(G322,G333,SQR(G323)),kmadd(g13L,knmsub(G133,G322,kmsub(G123,kmul(G323,ToReal(2)),kmul(G122,G333))),kmul(g23L,kmsub(G223,kmul(G323,ToReal(2)),kmul(G222,G333))))))))))),JacPDstandardNth33g22),JacPDstandardNth22g33))))))))); + kmul(ToReal(0.5),knmsub(gu11,kadd(JacPDstandardNth11g22,kmadd(JacPDstandardNth12g12,ToReal(-2),JacPDstandardNth22g11)),kmadd(gu21,ksub(JacPDstandardNth12g22,JacPDstandardNth21g22),kmadd(gu13,kadd(JacPDstandardNth12g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth22g13,JacPDstandardNth13g22))),kmadd(gu23,ksub(JacPDstandardNth32g22,JacPDstandardNth23g22),kmadd(gu31,kadd(JacPDstandardNth12g23,ksub(JacPDstandardNth32g12,kadd(JacPDstandardNth31g22,JacPDstandardNth22g13))),kmadd(ToReal(2),kmadd(gu13,kmadd(g13L,kmul(G112,G323),kmadd(g23L,kmul(G212,G323),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmadd(g22L,kmsub(G212,G223,kmul(G213,G222)),kmadd(g13L,kmsub(G123,G312,kmadd(G113,G322,kmul(G122,G313))),kmadd(g23L,kmsub(G223,G312,kmadd(G213,G322,kmul(G222,G313))),kmul(g33L,kmsub(G312,G323,kmul(G313,G322)))))))))),kmadd(gu31,kmadd(g13L,kmul(G112,G323),kmadd(g23L,kmul(G212,G323),kmadd(g11L,kmsub(G112,G123,kmul(G113,G122)),kmadd(g12L,kmadd(G123,G212,kmsub(G112,G223,kmadd(G113,G222,kmul(G122,G213)))),kmadd(g22L,kmsub(G212,G223,kmul(G213,G222)),kmadd(g13L,kmsub(G123,G312,kmadd(G113,G322,kmul(G122,G313))),kmadd(g23L,kmsub(G223,G312,kmadd(G213,G322,kmul(G222,G313))),kmul(g33L,kmsub(G312,G323,kmul(G313,G322)))))))))),kmul(gu11,kmadd(g22L,SQR(G212),knmsub(g12L,kmadd(G122,G211,kmadd(G111,G222,kmul(G112,kmul(G212,ToReal(-2))))),kmadd(g11L,knmsub(G111,G122,SQR(G112)),knmsub(G222,kmadd(g23L,G311,kmul(g22L,G211)),kmadd(g33L,knmsub(G311,G322,SQR(G312)),kmadd(g13L,knmsub(G122,G311,kmsub(G112,kmul(G312,ToReal(2)),kmul(G111,G322))),kmul(g23L,kmsub(G212,kmul(G312,ToReal(2)),kmul(G211,G322)))))))))))),kmul(gu33,ksub(ksub(kmadd(JacPDstandardNth32g23,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G223),knmsub(g12L,kmadd(G133,G222,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2))))),kmadd(g11L,knmsub(G122,G133,SQR(G123)),knmsub(G233,kmadd(g23L,G322,kmul(g22L,G222)),kmadd(g33L,knmsub(G322,G333,SQR(G323)),kmadd(g13L,knmsub(G133,G322,kmsub(G123,kmul(G323,ToReal(2)),kmul(G122,G333))),kmul(g23L,kmsub(G223,kmul(G323,ToReal(2)),kmul(G222,G333))))))))))),JacPDstandardNth33g22),JacPDstandardNth22g33))))))))); CCTK_REAL_VEC R23 = - kmul(ToReal(0.5),kmadd(gu23,kadd(JacPDstandardNth22g33,kmadd(JacPDstandardNth23g23,ToReal(-2),JacPDstandardNth33g22)),kmadd(gu11,kadd(JacPDstandardNth12g13,ksub(JacPDstandardNth13g12,kadd(JacPDstandardNth23g11,JacPDstandardNth11g23))),kmadd(gu21,kadd(JacPDstandardNth13g22,ksub(JacPDstandardNth22g13,kadd(JacPDstandardNth23g12,JacPDstandardNth21g23))),kmadd(gu13,kadd(JacPDstandardNth12g33,ksub(JacPDstandardNth33g12,kadd(JacPDstandardNth23g13,JacPDstandardNth13g23))),kmadd(gu33,ksub(JacPDstandardNth32g33,JacPDstandardNth23g33),kmadd(gu31,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth32g13,kadd(JacPDstandardNth31g23,JacPDstandardNth23g13))),kmul(kmadd(gu32,kmadd(G133,kmadd(g11L,G122,kmadd(g12L,G222,kmul(g13L,G322))),kmadd(G233,kmadd(G222,g22L,kmul(g23L,G322)),kmadd(G333,kmadd(G122,g13L,kmadd(G222,g23L,kmul(G322,g33L))),kmadd(kmadd(G123,kmul(g13L,G323),kmul(G223,kmul(g23L,G323))),ToReal(-2),kmsub(g12L,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2)))),kmadd(g11L,SQR(G123),kmadd(g33L,SQR(G323),kmul(g22L,SQR(G223))))))))),kmadd(gu11,kmadd(G112,kmul(g13L,G313),kmadd(G212,kmul(g23L,G313),kmadd(G312,kmul(G313,g33L),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmsub(g23L,kmsub(G213,G312,kmul(G211,G323)),kmul(G311,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L)))))))))))),kmadd(gu12,kmadd(G113,kmul(g13L,G322),kmadd(G213,kmul(g23L,G322),kmadd(G313,kmul(G322,g33L),kmadd(g11L,kmsub(G113,G122,kmul(G112,G123)),kmadd(g12L,kmadd(G122,G213,kmsub(G113,G222,kmadd(G112,G223,kmul(G123,G212)))),kmadd(g22L,kmsub(G213,G222,kmul(G212,G223)),kmadd(g13L,kmsub(G122,G313,kmul(G112,G323)),kmsub(g23L,kmsub(G222,G313,kmul(G212,G323)),kmul(G312,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L)))))))))))),kmul(gu31,kmadd(G112,kmul(g13L,G333),kmadd(G212,kmul(g23L,G333),kmadd(G312,kmadd(G133,g13L,kmadd(G233,g23L,kmul(G333,g33L))),kmadd(g11L,kmsub(G112,G133,kmul(G113,G123)),kmadd(g12L,kmadd(G133,G212,kmsub(G112,G233,kmadd(G113,G223,kmul(G123,G213)))),kmsub(g22L,kmsub(G212,G233,kmul(G213,G223)),kmadd(G313,kmadd(G123,g13L,kmadd(G323,g33L,kmul(G223,g23L))),kmul(G323,kmadd(G213,g23L,kmul(G113,g13L)))))))))))))),ToReal(2))))))))); + kmul(ToReal(0.5),kmadd(gu23,kadd(JacPDstandardNth22g33,kmadd(JacPDstandardNth23g23,ToReal(-2),JacPDstandardNth33g22)),kmadd(gu11,kadd(JacPDstandardNth12g13,ksub(JacPDstandardNth13g12,kadd(JacPDstandardNth23g11,JacPDstandardNth11g23))),kmadd(gu21,kadd(JacPDstandardNth13g22,ksub(JacPDstandardNth22g13,kadd(JacPDstandardNth23g12,JacPDstandardNth21g23))),kmadd(gu13,kadd(JacPDstandardNth12g33,ksub(JacPDstandardNth33g12,kadd(JacPDstandardNth23g13,JacPDstandardNth13g23))),kmadd(gu33,ksub(JacPDstandardNth32g33,JacPDstandardNth23g33),kmadd(gu31,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth32g13,kadd(JacPDstandardNth31g23,JacPDstandardNth23g13))),kmul(kmadd(gu32,kmadd(G133,kmadd(g11L,G122,kmadd(g12L,G222,kmul(g13L,G322))),kmadd(G233,kmadd(g22L,G222,kmul(g23L,G322)),kmadd(kmadd(g13L,G122,kmadd(g23L,G222,kmul(g33L,G322))),G333,kmadd(kmadd(g13L,kmul(G123,G323),kmul(g23L,kmul(G223,G323))),ToReal(-2),kmsub(g12L,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2)))),kmadd(g11L,SQR(G123),kmadd(g33L,SQR(G323),kmul(g22L,SQR(G223))))))))),kmadd(gu31,kmadd(g13L,kmul(G112,G333),kmadd(g23L,kmul(G212,G333),kmadd(G312,kmadd(g13L,G133,kmadd(g23L,G233,kmul(g33L,G333))),kmadd(g11L,kmsub(G112,G133,kmul(G113,G123)),knmsub(G323,kmadd(g23L,G213,kmul(g13L,G113)),kmadd(g12L,kmadd(G133,G212,kmsub(G112,G233,kmadd(G113,G223,kmul(G123,G213)))),kmsub(g22L,kmsub(G212,G233,kmul(G213,G223)),kmul(G313,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))))))))))),kmadd(gu11,kmadd(g13L,kmul(G112,G313),kmadd(g23L,kmul(G212,G313),kmadd(g33L,kmul(G312,G313),kmadd(g11L,kmsub(G112,G113,kmul(G111,G123)),kmadd(g12L,kmadd(G113,G212,kmsub(G112,G213,kmadd(G111,G223,kmul(G123,G211)))),kmadd(g22L,kmsub(G212,G213,kmul(G211,G223)),knmsub(G311,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))),kmadd(g13L,kmsub(G113,G312,kmul(G111,G323)),kmul(g23L,kmsub(G213,G312,kmul(G211,G323))))))))))),kmul(gu12,kmadd(g13L,kmul(G113,G322),kmadd(g23L,kmul(G213,G322),kmadd(g33L,kmul(G313,G322),kmadd(g11L,kmsub(G113,G122,kmul(G112,G123)),kmadd(g12L,kmadd(G122,G213,kmsub(G113,G222,kmadd(G112,G223,kmul(G123,G212)))),kmadd(g22L,kmsub(G213,G222,kmul(G212,G223)),knmsub(G312,kmadd(g13L,G123,kmadd(g33L,G323,kmul(g23L,G223))),kmadd(g13L,kmsub(G122,G313,kmul(G112,G323)),kmul(g23L,kmsub(G222,G313,kmul(G212,G323))))))))))))))),ToReal(2))))))))); CCTK_REAL_VEC R33 = - kmul(ToReal(0.5),knmsub(gu11,kadd(JacPDstandardNth11g33,kmadd(JacPDstandardNth13g13,ToReal(-2),JacPDstandardNth33g11)),kmadd(gu31,ksub(JacPDstandardNth13g33,JacPDstandardNth31g33),kmadd(gu32,ksub(JacPDstandardNth23g33,JacPDstandardNth32g33),kmadd(gu12,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth23g13,kadd(JacPDstandardNth33g12,JacPDstandardNth12g33))),kmadd(gu21,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth23g13,kadd(JacPDstandardNth33g12,JacPDstandardNth21g33))),kmadd(ToReal(2),kmadd(gu12,kmadd(G113,kmul(g13L,G323),kmadd(G213,kmul(g23L,G323),kmadd(G313,kmul(G323,g33L),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmsub(g23L,kmsub(G223,G313,kmul(G212,G333)),kmul(G312,kmadd(G133,g13L,kmadd(G333,g33L,kmul(G233,g23L)))))))))))),kmadd(gu21,kmadd(G113,kmul(g13L,G323),kmadd(G213,kmul(g23L,G323),kmadd(G313,kmul(G323,g33L),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmsub(g23L,kmsub(G223,G313,kmul(G212,G333)),kmul(G312,kmadd(G133,g13L,kmadd(G333,g33L,kmul(G233,g23L)))))))))))),kmul(gu11,kmadd(g22L,SQR(G213),knmsub(g12L,kmadd(G133,G211,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2))))),kmadd(g11L,knmsub(G111,G133,SQR(G113)),knmsub(G233,kmadd(g23L,G311,kmul(G211,g22L)),kmadd(g33L,knmsub(G311,G333,SQR(G313)),kmadd(g13L,knmsub(G133,G311,kmsub(G113,kmul(G313,ToReal(2)),kmul(G111,G333))),kmul(g23L,kmsub(G213,kmul(G313,ToReal(2)),kmul(G211,G333)))))))))))),kmul(gu22,ksub(ksub(kmadd(JacPDstandardNth23g23,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G223),knmsub(g12L,kmadd(G133,G222,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2))))),kmadd(g11L,knmsub(G122,G133,SQR(G123)),knmsub(G233,kmadd(g23L,G322,kmul(G222,g22L)),kmadd(g33L,knmsub(G322,G333,SQR(G323)),kmadd(g13L,knmsub(G133,G322,kmsub(G123,kmul(G323,ToReal(2)),kmul(G122,G333))),kmul(g23L,kmsub(G223,kmul(G323,ToReal(2)),kmul(G222,G333))))))))))),JacPDstandardNth33g22),JacPDstandardNth22g33))))))))); + kmul(ToReal(0.5),knmsub(gu11,kadd(JacPDstandardNth11g33,kmadd(JacPDstandardNth13g13,ToReal(-2),JacPDstandardNth33g11)),kmadd(gu31,ksub(JacPDstandardNth13g33,JacPDstandardNth31g33),kmadd(gu32,ksub(JacPDstandardNth23g33,JacPDstandardNth32g33),kmadd(gu12,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth23g13,kadd(JacPDstandardNth33g12,JacPDstandardNth12g33))),kmadd(gu21,kadd(JacPDstandardNth13g23,ksub(JacPDstandardNth23g13,kadd(JacPDstandardNth33g12,JacPDstandardNth21g33))),kmadd(ToReal(2),kmadd(gu12,kmadd(g13L,kmul(G113,G323),kmadd(g23L,kmul(G213,G323),kmadd(g33L,kmul(G313,G323),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),knmsub(G312,kmadd(g13L,G133,kmadd(g33L,G333,kmul(g23L,G233))),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmul(g23L,kmsub(G223,G313,kmul(G212,G333))))))))))),kmadd(gu21,kmadd(g13L,kmul(G113,G323),kmadd(g23L,kmul(G213,G323),kmadd(g33L,kmul(G313,G323),kmadd(g11L,kmsub(G113,G123,kmul(G112,G133)),kmadd(g12L,kmadd(G123,G213,kmsub(G113,G223,kmadd(G112,G233,kmul(G133,G212)))),kmadd(g22L,kmsub(G213,G223,kmul(G212,G233)),knmsub(G312,kmadd(g13L,G133,kmadd(g33L,G333,kmul(g23L,G233))),kmadd(g13L,kmsub(G123,G313,kmul(G112,G333)),kmul(g23L,kmsub(G223,G313,kmul(G212,G333))))))))))),kmul(gu11,kmadd(g22L,SQR(G213),knmsub(g12L,kmadd(G133,G211,kmadd(G111,G233,kmul(G113,kmul(G213,ToReal(-2))))),kmadd(g11L,knmsub(G111,G133,SQR(G113)),knmsub(G233,kmadd(g23L,G311,kmul(g22L,G211)),kmadd(g33L,knmsub(G311,G333,SQR(G313)),kmadd(g13L,knmsub(G133,G311,kmsub(G113,kmul(G313,ToReal(2)),kmul(G111,G333))),kmul(g23L,kmsub(G213,kmul(G313,ToReal(2)),kmul(G211,G333)))))))))))),kmul(gu22,ksub(ksub(kmadd(JacPDstandardNth23g23,ToReal(2),kmul(ToReal(2),kmadd(g22L,SQR(G223),knmsub(g12L,kmadd(G133,G222,kmadd(G122,G233,kmul(G123,kmul(G223,ToReal(-2))))),kmadd(g11L,knmsub(G122,G133,SQR(G123)),knmsub(G233,kmadd(g23L,G322,kmul(g22L,G222)),kmadd(g33L,knmsub(G322,G333,SQR(G323)),kmadd(g13L,knmsub(G133,G322,kmsub(G123,kmul(G323,ToReal(2)),kmul(G122,G333))),kmul(g23L,kmsub(G223,kmul(G323,ToReal(2)),kmul(G222,G333))))))))))),JacPDstandardNth33g22),JacPDstandardNth22g33))))))))); CCTK_REAL_VEC trR = kmadd(gu11,R11,kmadd(kadd(gu12,gu21),R12,kmadd(kadd(gu13,gu31),R13,kmadd(gu22,R22,kmadd(kadd(gu23,gu32),R23,kmul(gu33,R33)))))); CCTK_REAL_VEC Km11 = - kmadd(gu11,K11L,kmadd(gu12,K12L,kmul(gu13,K13L))); + kmadd(K11L,gu11,kmadd(K12L,gu12,kmul(K13L,gu13))); CCTK_REAL_VEC Km21 = - kmadd(gu21,K11L,kmadd(gu22,K12L,kmul(gu23,K13L))); + kmadd(K11L,gu21,kmadd(K12L,gu22,kmul(K13L,gu23))); CCTK_REAL_VEC Km31 = - kmadd(gu31,K11L,kmadd(gu32,K12L,kmul(gu33,K13L))); + kmadd(K11L,gu31,kmadd(K12L,gu32,kmul(K13L,gu33))); CCTK_REAL_VEC Km12 = - kmadd(gu11,K12L,kmadd(gu12,K22L,kmul(gu13,K23L))); + kmadd(K12L,gu11,kmadd(K22L,gu12,kmul(K23L,gu13))); CCTK_REAL_VEC Km22 = - kmadd(gu21,K12L,kmadd(gu22,K22L,kmul(gu23,K23L))); + kmadd(K12L,gu21,kmadd(K22L,gu22,kmul(K23L,gu23))); CCTK_REAL_VEC Km32 = - kmadd(gu31,K12L,kmadd(gu32,K22L,kmul(gu33,K23L))); + kmadd(K12L,gu31,kmadd(K22L,gu32,kmul(K23L,gu33))); CCTK_REAL_VEC Km13 = - kmadd(gu11,K13L,kmadd(gu12,K23L,kmul(gu13,K33L))); + kmadd(K13L,gu11,kmadd(K23L,gu12,kmul(K33L,gu13))); CCTK_REAL_VEC Km23 = - kmadd(gu21,K13L,kmadd(gu22,K23L,kmul(gu23,K33L))); + kmadd(K13L,gu21,kmadd(K23L,gu22,kmul(K33L,gu23))); CCTK_REAL_VEC Km33 = - kmadd(gu31,K13L,kmadd(gu32,K23L,kmul(gu33,K33L))); + kmadd(K13L,gu31,kmadd(K23L,gu32,kmul(K33L,gu33))); CCTK_REAL_VEC trK = kadd(Km11,kadd(Km22,Km33)); @@ -1203,13 +1210,13 @@ static void ML_ADM_constraints_Body(cGH const * restrict const cctkGH, int const kadd(trR,kadd(SQR(trK),kmsub(kmadd(Km12,Km21,kmadd(Km13,Km31,kmul(Km23,Km32))),ToReal(-2),kadd(kadd(SQR(Km33),SQR(Km22)),SQR(Km11))))); CCTK_REAL_VEC M1L = - kmadd(gu21,kadd(JacPDstandardNth2K11,kmadd(G211,K22L,kmadd(G311,K23L,ksub(knmsub(G112,K11L,kmsub(K12L,ksub(G111,G212),kmul(G312,K13L))),JacPDstandardNth1K12)))),kmadd(gu22,kadd(JacPDstandardNth2K12,kmadd(G212,K22L,kmadd(G312,K23L,ksub(knmsub(G122,K11L,kmsub(K12L,ksub(G112,G222),kmul(G322,K13L))),JacPDstandardNth1K22)))),kmadd(gu23,kadd(JacPDstandardNth2K13,kmadd(G213,K22L,kmadd(G313,K23L,ksub(knmsub(G123,K11L,kmsub(K12L,ksub(G113,G223),kmul(G323,K13L))),JacPDstandardNth1K23)))),kmadd(gu31,kadd(JacPDstandardNth3K11,kmadd(G211,K23L,kmadd(G311,K33L,ksub(knmsub(G113,K11L,kmsub(K13L,ksub(G111,G313),kmul(G213,K12L))),JacPDstandardNth1K13)))),kmadd(gu32,kadd(JacPDstandardNth3K12,kmadd(G212,K23L,kmadd(G312,K33L,ksub(knmsub(G123,K11L,kmsub(K13L,ksub(G112,G323),kmul(G223,K12L))),JacPDstandardNth1K23)))),kmul(gu33,kadd(JacPDstandardNth3K13,kmadd(G213,K23L,kmadd(G313,K33L,ksub(knmsub(G133,K11L,kmsub(K13L,ksub(G113,G333),kmul(G233,K12L))),JacPDstandardNth1K33)))))))))); + kmadd(gu21,kmadd(K22L,G211,kmadd(K23L,G311,kadd(JacPDstandardNth2K11,knmsub(K11L,G112,knmsub(K13L,G312,kmsub(K12L,ksub(G111,G212),JacPDstandardNth1K12)))))),kmadd(gu22,kmadd(K22L,G212,kmadd(K23L,G312,kadd(JacPDstandardNth2K12,knmsub(K11L,G122,knmsub(K13L,G322,kmsub(K12L,ksub(G112,G222),JacPDstandardNth1K22)))))),kmadd(gu23,kmadd(K22L,G213,kmadd(K23L,G313,kadd(JacPDstandardNth2K13,knmsub(K11L,G123,knmsub(K13L,G323,kmsub(K12L,ksub(G113,G223),JacPDstandardNth1K23)))))),kmadd(gu31,kmadd(K23L,G211,kmadd(K33L,G311,kadd(JacPDstandardNth3K11,knmsub(K11L,G113,knmsub(K12L,G213,kmsub(K13L,ksub(G111,G313),JacPDstandardNth1K13)))))),kmadd(gu32,kmadd(K23L,G212,kmadd(K33L,G312,kadd(JacPDstandardNth3K12,knmsub(K11L,G123,knmsub(K12L,G223,kmsub(K13L,ksub(G112,G323),JacPDstandardNth1K23)))))),kmul(gu33,kmadd(K23L,G213,kmadd(K33L,G313,kadd(JacPDstandardNth3K13,knmsub(K11L,G133,knmsub(K12L,G233,kmsub(K13L,ksub(G113,G333),JacPDstandardNth1K33)))))))))))); CCTK_REAL_VEC M2L = - kmadd(gu11,kadd(JacPDstandardNth1K12,kmadd(G112,K11L,kmadd(G312,K13L,ksub(knmsub(G211,K22L,kmsub(K12L,ksub(G212,G111),kmul(G311,K23L))),JacPDstandardNth2K11)))),kmadd(gu12,kadd(JacPDstandardNth1K22,kmadd(G122,K11L,kmadd(G322,K13L,ksub(knmsub(G212,K22L,kmsub(K12L,ksub(G222,G112),kmul(G312,K23L))),JacPDstandardNth2K12)))),kmadd(gu13,kadd(JacPDstandardNth1K23,kmadd(G123,K11L,kmadd(G323,K13L,ksub(knmsub(G213,K22L,kmsub(K12L,ksub(G223,G113),kmul(G313,K23L))),JacPDstandardNth2K13)))),kmadd(gu31,kadd(JacPDstandardNth3K12,kmadd(G112,K13L,kmadd(G312,K33L,ksub(knmsub(G113,K12L,kmsub(K23L,ksub(G212,G313),kmul(G213,K22L))),JacPDstandardNth2K13)))),kmadd(gu32,kadd(JacPDstandardNth3K22,kmadd(G122,K13L,kmadd(G322,K33L,ksub(knmsub(G123,K12L,kmsub(K23L,ksub(G222,G323),kmul(G223,K22L))),JacPDstandardNth2K23)))),kmul(gu33,kadd(JacPDstandardNth3K23,kmadd(G123,K13L,kmadd(G323,K33L,ksub(knmsub(G133,K12L,kmsub(K23L,ksub(G223,G333),kmul(G233,K22L))),JacPDstandardNth2K33)))))))))); + kmadd(gu11,kmadd(K11L,G112,kmadd(K13L,G312,kadd(JacPDstandardNth1K12,knmsub(K22L,G211,knmsub(K23L,G311,kmsub(K12L,ksub(G212,G111),JacPDstandardNth2K11)))))),kmadd(gu12,kmadd(K11L,G122,kmadd(K13L,G322,kadd(JacPDstandardNth1K22,knmsub(K22L,G212,knmsub(K23L,G312,kmsub(K12L,ksub(G222,G112),JacPDstandardNth2K12)))))),kmadd(gu13,kmadd(K11L,G123,kmadd(K13L,G323,kadd(JacPDstandardNth1K23,knmsub(K22L,G213,knmsub(K23L,G313,kmsub(K12L,ksub(G223,G113),JacPDstandardNth2K13)))))),kmadd(gu31,kmadd(K13L,G112,kmadd(K33L,G312,kadd(JacPDstandardNth3K12,knmsub(K12L,G113,knmsub(K22L,G213,kmsub(K23L,ksub(G212,G313),JacPDstandardNth2K13)))))),kmadd(gu32,kmadd(K13L,G122,kmadd(K33L,G322,kadd(JacPDstandardNth3K22,knmsub(K12L,G123,knmsub(K22L,G223,kmsub(K23L,ksub(G222,G323),JacPDstandardNth2K23)))))),kmul(gu33,kmadd(K13L,G123,kmadd(K33L,G323,kadd(JacPDstandardNth3K23,knmsub(K12L,G133,knmsub(K22L,G233,kmsub(K23L,ksub(G223,G333),JacPDstandardNth2K33)))))))))))); CCTK_REAL_VEC M3L = - kmadd(gu11,kadd(JacPDstandardNth1K13,kmadd(G113,K11L,kmadd(G213,K12L,ksub(knmsub(G211,K23L,kmsub(K13L,ksub(G313,G111),kmul(G311,K33L))),JacPDstandardNth3K11)))),kmadd(gu12,kadd(JacPDstandardNth1K23,kmadd(G123,K11L,kmadd(G223,K12L,ksub(knmsub(G212,K23L,kmsub(K13L,ksub(G323,G112),kmul(G312,K33L))),JacPDstandardNth3K12)))),kmadd(gu13,kadd(JacPDstandardNth1K33,kmadd(G133,K11L,kmadd(G233,K12L,ksub(knmsub(G213,K23L,kmsub(K13L,ksub(G333,G113),kmul(G313,K33L))),JacPDstandardNth3K13)))),kmadd(gu21,kadd(JacPDstandardNth2K13,kmadd(G113,K12L,kmadd(G213,K22L,ksub(knmsub(G112,K13L,kmsub(K23L,ksub(G313,G212),kmul(G312,K33L))),JacPDstandardNth3K12)))),kmadd(gu22,kadd(JacPDstandardNth2K23,kmadd(G123,K12L,kmadd(G223,K22L,ksub(knmsub(G122,K13L,kmsub(K23L,ksub(G323,G222),kmul(G322,K33L))),JacPDstandardNth3K22)))),kmul(gu23,kadd(JacPDstandardNth2K33,kmadd(G133,K12L,kmadd(G233,K22L,ksub(knmsub(G123,K13L,kmsub(K23L,ksub(G333,G223),kmul(G323,K33L))),JacPDstandardNth3K23)))))))))); + kmadd(gu11,kmadd(K11L,G113,kmadd(K12L,G213,kadd(JacPDstandardNth1K13,knmsub(K23L,G211,knmsub(K33L,G311,kmsub(K13L,ksub(G313,G111),JacPDstandardNth3K11)))))),kmadd(gu12,kmadd(K11L,G123,kmadd(K12L,G223,kadd(JacPDstandardNth1K23,knmsub(K23L,G212,knmsub(K33L,G312,kmsub(K13L,ksub(G323,G112),JacPDstandardNth3K12)))))),kmadd(gu13,kmadd(K11L,G133,kmadd(K12L,G233,kadd(JacPDstandardNth1K33,knmsub(K23L,G213,knmsub(K33L,G313,kmsub(K13L,ksub(G333,G113),JacPDstandardNth3K13)))))),kmadd(gu21,kmadd(K12L,G113,kmadd(K22L,G213,kadd(JacPDstandardNth2K13,knmsub(K13L,G112,knmsub(K33L,G312,kmsub(K23L,ksub(G313,G212),JacPDstandardNth3K12)))))),kmadd(gu22,kmadd(K12L,G123,kmadd(K22L,G223,kadd(JacPDstandardNth2K23,knmsub(K13L,G122,knmsub(K33L,G322,kmsub(K23L,ksub(G323,G222),JacPDstandardNth3K22)))))),kmul(gu23,kmadd(K12L,G133,kmadd(K22L,G233,kadd(JacPDstandardNth2K33,knmsub(K13L,G123,knmsub(K33L,G323,kmsub(K23L,ksub(G333,G223),JacPDstandardNth3K23)))))))))))); /* If necessary, store only partial vectors after the first iteration */ diff --git a/ML_ADM/src/ML_ADM_constraints_boundary.cc b/ML_ADM/src/ML_ADM_constraints_boundary.cc index 12367e2..7211b66 100644 --- a/ML_ADM/src/ML_ADM_constraints_boundary.cc +++ b/ML_ADM/src/ML_ADM_constraints_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" diff --git a/ML_ADM/src/ML_ADM_convertFromADMBase.cc b/ML_ADM/src/ML_ADM_convertFromADMBase.cc index 0ed6049..ccf06b3 100644 --- a/ML_ADM/src/ML_ADM_convertFromADMBase.cc +++ b/ML_ADM/src/ML_ADM_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" diff --git a/ML_ADM/src/ML_ADM_convertToADMBase.cc b/ML_ADM/src/ML_ADM_convertToADMBase.cc index 02358a2..83608cf 100644 --- a/ML_ADM/src/ML_ADM_convertToADMBase.cc +++ b/ML_ADM/src/ML_ADM_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" diff --git a/ML_ADMConstraints/configuration.ccl b/ML_ADMConstraints/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMConstraints/configuration.ccl +++ b/ML_ADMConstraints/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMConstraints/param.ccl b/ML_ADMConstraints/param.ccl index edc0502..76915f6 100644 --- a/ML_ADMConstraints/param.ccl +++ b/ML_ADMConstraints/param.ccl @@ -41,6 +41,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMConstraints_calc_every "ML_ADMConstraints_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMConstraints/schedule.ccl b/ML_ADMConstraints/schedule.ccl index 9840015..095a226 100644 --- a/ML_ADMConstraints/schedule.ccl +++ b/ML_ADMConstraints/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Ham[3] +if (timelevels == 1) +{ + STORAGE: ML_Ham[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Ham[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Ham[3] +} -STORAGE: ML_mom[3] +if (timelevels == 1) +{ + STORAGE: ML_mom[1] +} +if (timelevels == 2) +{ + STORAGE: ML_mom[2] +} +if (timelevels == 3) +{ + STORAGE: ML_mom[3] +} schedule ML_ADMConstraints_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMConstraints_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,15 @@ schedule group ML_ADMConstraints_group in MoL_PseudoEvolution after MoL_PostStep schedule ML_ADMConstraints in ML_ADMConstraints_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints::ML_Ham + WRITES: ML_ADMConstraints::ML_mom } "ML_ADMConstraints" schedule ML_ADMConstraints_SelectBCs in ML_ADMConstraints_bc_group @@ -68,6 +93,12 @@ schedule ML_ADMConstraints_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_ApplyBCs in MoL_PostStep after ML_ADMConstraints_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints/src/ML_ADMConstraints.cc b/ML_ADMConstraints/src/ML_ADMConstraints.cc index 54f0864..4c59931 100644 --- a/ML_ADMConstraints/src/ML_ADMConstraints.cc +++ b/ML_ADMConstraints/src/ML_ADMConstraints.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -91,7 +90,7 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMConstraints, + CCTK_LOOP3(ML_ADMConstraints, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -204,8 +203,8 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const CCTK_REAL const PDstandardNth2kzz = PDstandardNth2(&kzz[index]); /* Calculate temporaries and grid functions */ - CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - SQR(gxyL)) - - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); + CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - + SQR(gxyL)) - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); CCTK_REAL gu11 = INV(detg)*(gyyL*gzzL - SQR(gyzL)); @@ -285,156 +284,163 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const + gu32*(-PDstandardNth2gzz + 2*PDstandardNth3gyz) + gu33*PDstandardNth3gzz); - CCTK_REAL R11 = 0.5*(gu32*(4*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) + + CCTK_REAL R11 = 0.5*(gu32*(4*(gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323)) + 2*(-PDstandardNth11gyz + PDstandardNth12gxz + PDstandardNth13gxy - PDstandardNth23gxx)) + gu22*(-PDstandardNth11gyy + 2*PDstandardNth12gxy - - PDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-PDstandardNth11gzz + 2*PDstandardNth13gxz - - PDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313))))); + - PDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-PDstandardNth11gzz + + 2*PDstandardNth13gxz - PDstandardNth33gxx + 2*(2*gyzL*G213*G313 + + 2*G113*(gxyL*G213 + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313)))); CCTK_REAL R12 = 0.5*(gu21*(PDstandardNth11gyy - 2*PDstandardNth12gxy + PDstandardNth22gxx) + gu31*(PDstandardNth11gyz - PDstandardNth12gxz - PDstandardNth13gxy + PDstandardNth23gxx) + gu32*(-PDstandardNth12gyz + PDstandardNth13gyy + PDstandardNth22gxz - PDstandardNth23gxy) + gu33*(-PDstandardNth12gzz + PDstandardNth13gyz + PDstandardNth23gxz - - PDstandardNth33gxy) + 2*(gu31*((G123*G211 - G113*G212)*gxyL + - (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + G213*gxyL + G313*gxzL) - + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G212*G213) + - G211*G223)*gyyL + (G223*G311 - G213*G312 - G212*G313)*gyzL + - G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + gu32*((-(G123*G212) - + G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL + G113*(G122*gxxL + - G222*gxyL + G322*gxzL) - G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL + (-(G223*G312) + - G222*G313 - G212*G323)*gyzL + (G313*G322 - G312*G323)*gzzL) + - gu33*((-(G133*G212) + G123*G213)*gxyL + (-(G133*G312) + G123*G313)*gxzL - + G113*(G123*gxxL + G223*gxyL + G323*gxzL) - G112*(G133*gxxL + - G233*gxyL + G333*gxzL) + (G213*G223 - G212*G233)*gyyL + G213*G323*gyzL - + (-(G233*G312) + G223*G313 - G212*G333)*gyzL + (G313*G323 - - G312*G333)*gzzL) + gu21*(G122*(G211*gxyL + G311*gxzL) + G111*(G122*gxxL - + G222*gxyL + G322*gxzL) + G222*(G211*gyyL + G311*gyzL) - - 2*(G112*(G212*gxyL + G312*gxzL) + G212*G312*gyzL) + G322*(G211*gyzL + - G311*gzzL) - gxxL*SQR(G112) - gyyL*SQR(G212) - gzzL*SQR(G312)))); + PDstandardNth33gxy) + 2*((gxyL*(G123*G211 - G113*G212) + + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + gxyL*G213 + + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - gzzL*G313) + + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(gxzL*G123 + + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + G211*G323))*gu31 + + (gxyL*(-(G123*G212) + G122*G213) + gyyL*(G213*G222 - G212*G223) + + gxzL*G122*G313 + gyzL*G213*G322 + gzzL*G313*G322 + + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu32 + + (gxyL*(-(G133*G212) + G123*G213) + gyyL*(G213*G223 - G212*G233) + + gxzL*G123*G313 + gyzL*G213*G323 + gzzL*G313*G323 + + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - G112*(gxxL*G133 + + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - gyzL*G233 - + gzzL*G333) + gyzL*(G223*G313 - G212*G333))*gu33 + + gu21*(-2*(gyzL*G212*G312 + G112*(gxyL*G212 + gxzL*G312)) + + G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G211*(gxyL*G122 + + gyyL*G222 + gyzL*G322) + G311*(gxzL*G122 + gyzL*G222 + + gzzL*G322) - gxxL*SQR(G112) - gyyL*SQR(G212) - + gzzL*SQR(G312)))); CCTK_REAL R13 = 0.5*(gu21*(PDstandardNth11gyz - PDstandardNth12gxz - PDstandardNth13gxy + PDstandardNth23gxx) + gu22*(PDstandardNth12gyz - PDstandardNth13gyy - PDstandardNth22gxz + PDstandardNth23gxy) + gu31*(PDstandardNth11gzz - 2*PDstandardNth13gxz + PDstandardNth33gxx) + gu32*(PDstandardNth12gzz - PDstandardNth13gyz - PDstandardNth23gxz + - PDstandardNth33gxy) + 2*(gu21*((G123*G211 - G113*G212)*gxyL + - (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + G213*gxyL + G313*gxzL) - + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G212*G213) + - G211*G223)*gyyL + (G223*G311 - G213*G312 - G212*G313)*gyzL + - G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + gu22*((G123*G212 - - G122*G213)*gxyL + (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + - G222*gxyL + G322*gxzL) + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (-(G213*G222) + G212*G223)*gyyL + (G223*G312 - G222*G313 - - G213*G322)*gyzL + G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + - gu32*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu31*(G133*(G211*gxyL + G311*gxzL) + G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G211*gyyL + G311*gyzL) - 2*(G113*(G213*gxyL + - G313*gxzL) + G213*G313*gyzL) + G333*(G211*gyzL + G311*gzzL) - - gxxL*SQR(G113) - gyyL*SQR(G213) - gzzL*SQR(G313)))); - - CCTK_REAL R22 = 0.5*(gu31*(4*((G123*G212 - G122*G213)*gxyL + - (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + G222*gxyL + G322*gxzL) - + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G213*G222) + - G212*G223)*gyyL + (G223*G312 - G222*G313 - G213*G322)*gyzL + - G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + + PDstandardNth33gxy) + 2*((gxyL*(G123*G211 - G113*G212) + + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + gxyL*G213 + + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - gzzL*G313) + + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(gxzL*G123 + + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + G211*G323))*gu21 + + (gxyL*(G123*G212 - G122*G213) + gyyL*(-(G213*G222) + G212*G223) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G313*(-(gxzL*G122) - + gyzL*G222 - gzzL*G322) + G112*(gxxL*G123 + gxyL*G223 + + gxzL*G323) + G312*(gxzL*G123 + gyzL*G223 + gzzL*G323) + + gyzL*(-(G213*G322) + G212*G323))*gu22 + (gxyL*(G133*G212 - + G123*G213) + gyyL*(-(G213*G223) + G212*G233) - G113*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + gyzL*(-(G213*G323) + + G212*G333))*gu32 + gu31*(-2*(gyzL*G213*G313 + G113*(gxyL*G213 + + gxzL*G313)) + G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(gxyL*G133 + gyyL*G233 + gyzL*G333) + G311*(gxzL*G133 + + gyzL*G233 + gzzL*G333) - gxxL*SQR(G113) - gyyL*SQR(G213) - + gzzL*SQR(G313)))); + + CCTK_REAL R22 = 0.5*(gu31*(4*(gxyL*(G123*G212 - G122*G213) + + gyyL*(-(G213*G222) + G212*G223) + gxzL*(G123*G312 - G122*G313) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + gyzL*(G223*G312 - + G222*G313 - G213*G322) + gyzL*G212*G323 + G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + gzzL*(-(G313*G322) + G312*G323)) + 2*(PDstandardNth12gyz - PDstandardNth13gyy - PDstandardNth22gxz + PDstandardNth23gxy)) + gu11*(-PDstandardNth11gyy + 2*PDstandardNth12gxy - - PDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-PDstandardNth22gzz + 2*PDstandardNth23gyz - - PDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + - PDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-PDstandardNth22gzz + + 2*PDstandardNth23gyz - PDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL R23 = 0.5*(gu11*(-PDstandardNth11gyz + PDstandardNth12gxz + PDstandardNth13gxy - PDstandardNth23gxx) + gu21*(-PDstandardNth12gyz + PDstandardNth13gyy + PDstandardNth22gxz - PDstandardNth23gxy) + gu31*(PDstandardNth12gzz - PDstandardNth13gyz - PDstandardNth23gxz + PDstandardNth33gxy) + gu32*(PDstandardNth22gzz - 2*PDstandardNth23gyz + - PDstandardNth33gyy) + 2*(gu11*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) + gu21*((-(G123*G212) + - G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL + G113*(G122*gxxL + - G222*gxyL + G322*gxzL) - G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL + (-(G223*G312) + - G222*G313 - G212*G323)*gyzL + (G313*G322 - G312*G323)*gzzL) + - gu31*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu32*(G133*(G222*gxyL + G322*gxzL) + G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G222*gyyL + G322*gyzL) - 2*(G123*(G223*gxyL + - G323*gxzL) + G223*G323*gyzL) + G333*(G222*gyzL + G322*gzzL) - - gxxL*SQR(G123) - gyyL*SQR(G223) - gzzL*SQR(G323)))); - - CCTK_REAL R33 = 0.5*(gu21*(4*((-(G133*G212) + G123*G213)*gxyL + - (-(G133*G312) + G123*G313)*gxzL + G113*(G123*gxxL + G223*gxyL + - G323*gxzL) - G112*(G133*gxxL + G233*gxyL + G333*gxzL) + (G213*G223 - - G212*G233)*gyyL + G213*G323*gyzL + (-(G233*G312) + G223*G313 - - G212*G333)*gyzL + (G313*G323 - G312*G333)*gzzL) + + PDstandardNth33gyy) + 2*((gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323))*gu11 + + (gxyL*(-(G123*G212) + G122*G213) + gyyL*(G213*G222 - G212*G223) + + gxzL*G122*G313 + gyzL*G213*G322 + gzzL*G313*G322 + + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu21 + + (gxyL*(G133*G212 - G123*G213) + gyyL*(-(G213*G223) + G212*G233) - + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + + gyzL*(-(G213*G323) + G212*G333))*gu31 + gu32*(-2*(gyzL*G223*G323 + + G123*(gxyL*G223 + gxzL*G323)) + G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(gxyL*G133 + gyyL*G233 + gyzL*G333) + + G322*(gxzL*G133 + gyzL*G233 + gzzL*G333) - gxxL*SQR(G123) - + gyyL*SQR(G223) - gzzL*SQR(G323)))); + + CCTK_REAL R33 = 0.5*(gu21*(4*(gxyL*(-(G133*G212) + G123*G213) + + gyyL*(G213*G223 - G212*G233) + gxzL*G123*G313 + gyzL*G213*G323 + + gzzL*G313*G323 + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - + gyzL*G233 - gzzL*G333) + gyzL*(G223*G313 - G212*G333)) + 2*(-PDstandardNth12gzz + PDstandardNth13gyz + PDstandardNth23gxz - PDstandardNth33gxy)) + gu11*(-PDstandardNth11gzz + 2*PDstandardNth13gxz - - PDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313)))) + gu22*(-PDstandardNth22gzz + 2*PDstandardNth23gyz - - PDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + - PDstandardNth33gxx + 2*(2*gyzL*G213*G313 + 2*G113*(gxyL*G213 + + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313))) + gu22*(-PDstandardNth22gzz + + 2*PDstandardNth23gyz - PDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL trR = gu11*R11 + gu22*R22 + 2*(gu21*R12 + gu31*R13 + gu32*R23) + gu33*R33; - CCTK_REAL Km11 = gu11*kxxL + gu21*kxyL + gu31*kxzL; + CCTK_REAL Km11 = kxxL*gu11 + kxyL*gu21 + kxzL*gu31; - CCTK_REAL Km21 = gu21*kxxL + gu22*kxyL + gu32*kxzL; + CCTK_REAL Km21 = kxxL*gu21 + kxyL*gu22 + kxzL*gu32; - CCTK_REAL Km31 = gu31*kxxL + gu32*kxyL + gu33*kxzL; + CCTK_REAL Km31 = kxxL*gu31 + kxyL*gu32 + kxzL*gu33; - CCTK_REAL Km12 = gu11*kxyL + gu21*kyyL + gu31*kyzL; + CCTK_REAL Km12 = kxyL*gu11 + kyyL*gu21 + kyzL*gu31; - CCTK_REAL Km22 = gu21*kxyL + gu22*kyyL + gu32*kyzL; + CCTK_REAL Km22 = kxyL*gu21 + kyyL*gu22 + kyzL*gu32; - CCTK_REAL Km32 = gu31*kxyL + gu32*kyyL + gu33*kyzL; + CCTK_REAL Km32 = kxyL*gu31 + kyyL*gu32 + kyzL*gu33; - CCTK_REAL Km13 = gu11*kxzL + gu21*kyzL + gu31*kzzL; + CCTK_REAL Km13 = kxzL*gu11 + kyzL*gu21 + kzzL*gu31; - CCTK_REAL Km23 = gu21*kxzL + gu22*kyzL + gu32*kzzL; + CCTK_REAL Km23 = kxzL*gu21 + kyzL*gu22 + kzzL*gu32; - CCTK_REAL Km33 = gu31*kxzL + gu32*kyzL + gu33*kzzL; + CCTK_REAL Km33 = kxzL*gu31 + kyzL*gu32 + kzzL*gu33; CCTK_REAL trK = Km11 + Km22 + Km33; CCTK_REAL rho = INV(SQR(alpL))*(eTttL - 2*(betayL*eTtyL + - betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + betazL*eTxzL) + - betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + eTyyL*SQR(betayL) + - eTzzL*SQR(betazL)); + betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + + betazL*eTxzL) + betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + + eTyyL*SQR(betayL) + eTzzL*SQR(betazL)); CCTK_REAL S1 = (-eTtxL + betaxL*eTxxL + betayL*eTxyL + betazL*eTxzL)*INV(alpL); @@ -445,45 +451,47 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const CCTK_REAL S3 = (-eTtzL + betaxL*eTxzL + betayL*eTyzL + betazL*eTzzL)*INV(alpL); - CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - - 50.26548245743669181540229413247204614715*rho + trR - SQR(Km11) - - SQR(Km22) - SQR(Km33) + SQR(trK); - - CCTK_REAL M1L = gu21*(-(G112*kxxL) + (G111 - G212)*kxyL - G312*kxzL + - G211*kyyL + G311*kyzL - PDstandardNth1kxy + PDstandardNth2kxx) + - gu22*(-(G122*kxxL) + (G112 - G222)*kxyL - G322*kxzL + G212*kyyL + - G312*kyzL - PDstandardNth1kyy + PDstandardNth2kxy) + gu31*(-(G113*kxxL) - - G213*kxyL + (G111 - G313)*kxzL + G211*kyzL + G311*kzzL - - PDstandardNth1kxz + PDstandardNth3kxx) + gu32*(G113*kxyL + G112*kxzL + - G213*kyyL + (G212 + G313)*kyzL + G312*kzzL - 2*(G123*kxxL + G223*kxyL + - G323*kxzL + PDstandardNth1kyz) + PDstandardNth2kxz + PDstandardNth3kxy) - + gu33*(-(G133*kxxL) - G233*kxyL + (G113 - G333)*kxzL + G213*kyzL + - G313*kzzL - PDstandardNth1kzz + PDstandardNth3kxz) - - 25.13274122871834590770114706623602307358*S1; - - CCTK_REAL M2L = gu11*(G112*kxxL + (-G111 + G212)*kxyL + G312*kxzL - - G211*kyyL - G311*kyzL + PDstandardNth1kxy - PDstandardNth2kxx) + - gu21*(G122*kxxL + (-G112 + G222)*kxyL + G322*kxzL - G212*kyyL - - G312*kyzL + PDstandardNth1kyy - PDstandardNth2kxy) + gu31*(G123*kxxL + - (-2*G113 + G223)*kxyL + (G112 + G323)*kxzL + G212*kyzL + G312*kzzL + - PDstandardNth1kyz - 2*(G213*kyyL + G313*kyzL + PDstandardNth2kxz) + - PDstandardNth3kxy) + gu32*(-(G123*kxyL) + G122*kxzL - G223*kyyL + (G222 - - G323)*kyzL + G322*kzzL - PDstandardNth2kyz + PDstandardNth3kyy) + - gu33*(-(G133*kxyL) + G123*kxzL - G233*kyyL + (G223 - G333)*kyzL + - G323*kzzL - PDstandardNth2kzz + PDstandardNth3kyz) - - 25.13274122871834590770114706623602307358*S2; - - CCTK_REAL M3L = gu11*(G113*kxxL + G213*kxyL + (-G111 + G313)*kxzL - - G211*kyzL - G311*kzzL + PDstandardNth1kxz - PDstandardNth3kxx) + - gu21*(G123*kxxL + (G113 + G223)*kxyL + (-2*G112 + G323)*kxzL + - G213*kyyL + (-2*G212 + G313)*kyzL + PDstandardNth1kyz + - PDstandardNth2kxz - 2*(G312*kzzL + PDstandardNth3kxy)) + - gu31*(G133*kxxL + G233*kxyL + (-G113 + G333)*kxzL - G213*kyzL - - G313*kzzL + PDstandardNth1kzz - PDstandardNth3kxz) + gu22*(G123*kxyL - - G122*kxzL + G223*kyyL + (-G222 + G323)*kyzL - G322*kzzL + - PDstandardNth2kyz - PDstandardNth3kyy) + gu32*(G133*kxyL - G123*kxzL + - G233*kyyL + (-G223 + G333)*kyzL - G323*kzzL + PDstandardNth2kzz - - PDstandardNth3kyz) - 25.13274122871834590770114706623602307358*S3; + CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - 16*Pi*rho + + trR - SQR(Km11) - SQR(Km22) - SQR(Km33) + SQR(trK); + + CCTK_REAL M1L = gu21*(-(kxxL*G112) + kyyL*G211 + kxyL*(G111 - + G212) + kyzL*G311 - kxzL*G312 - PDstandardNth1kxy + + PDstandardNth2kxx) + gu22*(-(kxxL*G122) + kyyL*G212 + kxyL*(G112 + - G222) + kyzL*G312 - kxzL*G322 - PDstandardNth1kyy + + PDstandardNth2kxy) + gu31*(-(kxxL*G113) + kyzL*G211 - kxyL*G213 + + kzzL*G311 + kxzL*(G111 - G313) - PDstandardNth1kxz + + PDstandardNth3kxx) + gu32*(kyyL*G213 + kxyL*(G113 - 2*G223) + + kzzL*G312 + kyzL*(G212 + G313) + kxzL*(G112 - 2*G323) - + 2*(kxxL*G123 + PDstandardNth1kyz) + PDstandardNth2kxz + + PDstandardNth3kxy) + gu33*(-(kxxL*G133) + kyzL*G213 - kxyL*G233 + + kzzL*G313 + kxzL*(G113 - G333) - PDstandardNth1kzz + + PDstandardNth3kxz) - 8*Pi*S1; + + CCTK_REAL M2L = gu11*(kxxL*G112 - kyyL*G211 + kxyL*(-G111 + + G212) - kyzL*G311 + kxzL*G312 + PDstandardNth1kxy - + PDstandardNth2kxx) + gu21*(kxxL*G122 - kyyL*G212 + kxyL*(-G112 + + G222) - kyzL*G312 + kxzL*G322 + PDstandardNth1kyy - + PDstandardNth2kxy) + gu31*(kxxL*G123 + kxyL*G223 + kzzL*G312 + + kyzL*(G212 - 2*G313) + kxzL*(G112 + G323) + PDstandardNth1kyz - + 2*(kxyL*G113 + kyyL*G213 + PDstandardNth2kxz) + PDstandardNth3kxy) + + gu32*(kxzL*G122 - kxyL*G123 - kyyL*G223 + kzzL*G322 + + kyzL*(G222 - G323) - PDstandardNth2kyz + PDstandardNth3kyy) + + gu33*(kxzL*G123 - kxyL*G133 - kyyL*G233 + kzzL*G323 + + kyzL*(G223 - G333) - PDstandardNth2kzz + PDstandardNth3kyz) - + 8*Pi*S2; + + CCTK_REAL M3L = gu11*(kxxL*G113 - kyzL*G211 + kxyL*G213 - + kzzL*G311 + kxzL*(-G111 + G313) + PDstandardNth1kxz - + PDstandardNth3kxx) + gu21*(kxxL*G123 + kyyL*G213 + kxyL*(G113 + + G223) + kyzL*G313 + kxzL*G323 + PDstandardNth1kyz + + PDstandardNth2kxz - 2*(kxzL*G112 + kyzL*G212 + kzzL*G312 + + PDstandardNth3kxy)) + gu31*(kxxL*G133 - kyzL*G213 + kxyL*G233 - + kzzL*G313 + kxzL*(-G113 + G333) + PDstandardNth1kzz - + PDstandardNth3kxz) + gu22*(-(kxzL*G122) + kxyL*G123 + kyyL*G223 - + kzzL*G322 + kyzL*(-G222 + G323) + PDstandardNth2kyz - + PDstandardNth3kyy) + gu32*(-(kxzL*G123) + kxyL*G133 + kyyL*G233 - + kzzL*G323 + kyzL*(-G223 + G333) + PDstandardNth2kzz - + PDstandardNth3kyz) - 8*Pi*S3; /* Copy local copies back to grid functions */ H[index] = HL; @@ -491,7 +499,7 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const M2[index] = M2L; M3[index] = M3L; } - LC_ENDLOOP3 (ML_ADMConstraints); + CCTK_ENDLOOP3(ML_ADMConstraints); } extern "C" void ML_ADMConstraints(CCTK_ARGUMENTS) @@ -510,12 +518,18 @@ extern "C" void ML_ADMConstraints(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints::ML_Ham","ML_ADMConstraints::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints::ML_Ham", + "ML_ADMConstraints::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints/src/make.code.defn b/ML_ADMConstraints/src/make.code.defn index e9b2afb..d94f02e 100644 --- a/ML_ADMConstraints/src/make.code.defn +++ b/ML_ADMConstraints/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints.cc Boundaries.cc diff --git a/ML_ADMConstraints_MP/configuration.ccl b/ML_ADMConstraints_MP/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMConstraints_MP/configuration.ccl +++ b/ML_ADMConstraints_MP/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMConstraints_MP/param.ccl b/ML_ADMConstraints_MP/param.ccl index 4acc5aa..5749429 100644 --- a/ML_ADMConstraints_MP/param.ccl +++ b/ML_ADMConstraints_MP/param.ccl @@ -44,6 +44,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMConstraints_MP_calc_every "ML_ADMConstraints_MP_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMConstraints_MP/schedule.ccl b/ML_ADMConstraints_MP/schedule.ccl index 08dbab9..a20f509 100644 --- a/ML_ADMConstraints_MP/schedule.ccl +++ b/ML_ADMConstraints_MP/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Ham[3] +if (timelevels == 1) +{ + STORAGE: ML_Ham[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Ham[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Ham[3] +} -STORAGE: ML_mom[3] +if (timelevels == 1) +{ + STORAGE: ML_mom[1] +} +if (timelevels == 2) +{ + STORAGE: ML_mom[2] +} +if (timelevels == 3) +{ + STORAGE: ML_mom[3] +} schedule ML_ADMConstraints_MP_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMConstraints_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,15 @@ schedule group ML_ADMConstraints_MP_group in MoL_PseudoEvolution after MoL_PostS schedule ML_ADMConstraints_MP in ML_ADMConstraints_MP_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints_MP::ML_Ham + WRITES: ML_ADMConstraints_MP::ML_mom } "ML_ADMConstraints_MP" schedule ML_ADMConstraints_MP_SelectBCs in ML_ADMConstraints_MP_bc_group @@ -68,6 +93,12 @@ schedule ML_ADMConstraints_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_MP_ApplyBCs in MoL_PostStep after ML_ADMConstraints_MP_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc b/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc index 572a0d2..85f83c7 100644 --- a/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc +++ b/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -136,7 +135,7 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMConstraints_MP, + CCTK_LOOP3(ML_ADMConstraints_MP, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -381,104 +380,104 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con if (use_jacobian) { - JacPDstandardNth1gxx = J11L*PDstandardNth1gxx + J21L*PDstandardNth2gxx - + J31L*PDstandardNth3gxx; + JacPDstandardNth1gxx = J11L*PDstandardNth1gxx + + J21L*PDstandardNth2gxx + J31L*PDstandardNth3gxx; - JacPDstandardNth1gxy = J11L*PDstandardNth1gxy + J21L*PDstandardNth2gxy - + J31L*PDstandardNth3gxy; + JacPDstandardNth1gxy = J11L*PDstandardNth1gxy + + J21L*PDstandardNth2gxy + J31L*PDstandardNth3gxy; - JacPDstandardNth1gxz = J11L*PDstandardNth1gxz + J21L*PDstandardNth2gxz - + J31L*PDstandardNth3gxz; + JacPDstandardNth1gxz = J11L*PDstandardNth1gxz + + J21L*PDstandardNth2gxz + J31L*PDstandardNth3gxz; - JacPDstandardNth1gyy = J11L*PDstandardNth1gyy + J21L*PDstandardNth2gyy - + J31L*PDstandardNth3gyy; + JacPDstandardNth1gyy = J11L*PDstandardNth1gyy + + J21L*PDstandardNth2gyy + J31L*PDstandardNth3gyy; - JacPDstandardNth1gyz = J11L*PDstandardNth1gyz + J21L*PDstandardNth2gyz - + J31L*PDstandardNth3gyz; + JacPDstandardNth1gyz = J11L*PDstandardNth1gyz + + J21L*PDstandardNth2gyz + J31L*PDstandardNth3gyz; - JacPDstandardNth1gzz = J11L*PDstandardNth1gzz + J21L*PDstandardNth2gzz - + J31L*PDstandardNth3gzz; + JacPDstandardNth1gzz = J11L*PDstandardNth1gzz + + J21L*PDstandardNth2gzz + J31L*PDstandardNth3gzz; - JacPDstandardNth1kxy = J11L*PDstandardNth1kxy + J21L*PDstandardNth2kxy - + J31L*PDstandardNth3kxy; + JacPDstandardNth1kxy = J11L*PDstandardNth1kxy + + J21L*PDstandardNth2kxy + J31L*PDstandardNth3kxy; - JacPDstandardNth1kxz = J11L*PDstandardNth1kxz + J21L*PDstandardNth2kxz - + J31L*PDstandardNth3kxz; + JacPDstandardNth1kxz = J11L*PDstandardNth1kxz + + J21L*PDstandardNth2kxz + J31L*PDstandardNth3kxz; - JacPDstandardNth1kyy = J11L*PDstandardNth1kyy + J21L*PDstandardNth2kyy - + J31L*PDstandardNth3kyy; + JacPDstandardNth1kyy = J11L*PDstandardNth1kyy + + J21L*PDstandardNth2kyy + J31L*PDstandardNth3kyy; - JacPDstandardNth1kyz = J11L*PDstandardNth1kyz + J21L*PDstandardNth2kyz - + J31L*PDstandardNth3kyz; + JacPDstandardNth1kyz = J11L*PDstandardNth1kyz + + J21L*PDstandardNth2kyz + J31L*PDstandardNth3kyz; - JacPDstandardNth1kzz = J11L*PDstandardNth1kzz + J21L*PDstandardNth2kzz - + J31L*PDstandardNth3kzz; + JacPDstandardNth1kzz = J11L*PDstandardNth1kzz + + J21L*PDstandardNth2kzz + J31L*PDstandardNth3kzz; - JacPDstandardNth2gxx = J12L*PDstandardNth1gxx + J22L*PDstandardNth2gxx - + J32L*PDstandardNth3gxx; + JacPDstandardNth2gxx = J12L*PDstandardNth1gxx + + J22L*PDstandardNth2gxx + J32L*PDstandardNth3gxx; - JacPDstandardNth2gxy = J12L*PDstandardNth1gxy + J22L*PDstandardNth2gxy - + J32L*PDstandardNth3gxy; + JacPDstandardNth2gxy = J12L*PDstandardNth1gxy + + J22L*PDstandardNth2gxy + J32L*PDstandardNth3gxy; - JacPDstandardNth2gxz = J12L*PDstandardNth1gxz + J22L*PDstandardNth2gxz - + J32L*PDstandardNth3gxz; + JacPDstandardNth2gxz = J12L*PDstandardNth1gxz + + J22L*PDstandardNth2gxz + J32L*PDstandardNth3gxz; - JacPDstandardNth2gyy = J12L*PDstandardNth1gyy + J22L*PDstandardNth2gyy - + J32L*PDstandardNth3gyy; + JacPDstandardNth2gyy = J12L*PDstandardNth1gyy + + J22L*PDstandardNth2gyy + J32L*PDstandardNth3gyy; - JacPDstandardNth2gyz = J12L*PDstandardNth1gyz + J22L*PDstandardNth2gyz - + J32L*PDstandardNth3gyz; + JacPDstandardNth2gyz = J12L*PDstandardNth1gyz + + J22L*PDstandardNth2gyz + J32L*PDstandardNth3gyz; - JacPDstandardNth2gzz = J12L*PDstandardNth1gzz + J22L*PDstandardNth2gzz - + J32L*PDstandardNth3gzz; + JacPDstandardNth2gzz = J12L*PDstandardNth1gzz + + J22L*PDstandardNth2gzz + J32L*PDstandardNth3gzz; - JacPDstandardNth2kxx = J12L*PDstandardNth1kxx + J22L*PDstandardNth2kxx - + J32L*PDstandardNth3kxx; + JacPDstandardNth2kxx = J12L*PDstandardNth1kxx + + J22L*PDstandardNth2kxx + J32L*PDstandardNth3kxx; - JacPDstandardNth2kxy = J12L*PDstandardNth1kxy + J22L*PDstandardNth2kxy - + J32L*PDstandardNth3kxy; + JacPDstandardNth2kxy = J12L*PDstandardNth1kxy + + J22L*PDstandardNth2kxy + J32L*PDstandardNth3kxy; - JacPDstandardNth2kxz = J12L*PDstandardNth1kxz + J22L*PDstandardNth2kxz - + J32L*PDstandardNth3kxz; + JacPDstandardNth2kxz = J12L*PDstandardNth1kxz + + J22L*PDstandardNth2kxz + J32L*PDstandardNth3kxz; - JacPDstandardNth2kyz = J12L*PDstandardNth1kyz + J22L*PDstandardNth2kyz - + J32L*PDstandardNth3kyz; + JacPDstandardNth2kyz = J12L*PDstandardNth1kyz + + J22L*PDstandardNth2kyz + J32L*PDstandardNth3kyz; - JacPDstandardNth2kzz = J12L*PDstandardNth1kzz + J22L*PDstandardNth2kzz - + J32L*PDstandardNth3kzz; + JacPDstandardNth2kzz = J12L*PDstandardNth1kzz + + J22L*PDstandardNth2kzz + J32L*PDstandardNth3kzz; - JacPDstandardNth3gxx = J13L*PDstandardNth1gxx + J23L*PDstandardNth2gxx - + J33L*PDstandardNth3gxx; + JacPDstandardNth3gxx = J13L*PDstandardNth1gxx + + J23L*PDstandardNth2gxx + J33L*PDstandardNth3gxx; - JacPDstandardNth3gxy = J13L*PDstandardNth1gxy + J23L*PDstandardNth2gxy - + J33L*PDstandardNth3gxy; + JacPDstandardNth3gxy = J13L*PDstandardNth1gxy + + J23L*PDstandardNth2gxy + J33L*PDstandardNth3gxy; - JacPDstandardNth3gxz = J13L*PDstandardNth1gxz + J23L*PDstandardNth2gxz - + J33L*PDstandardNth3gxz; + JacPDstandardNth3gxz = J13L*PDstandardNth1gxz + + J23L*PDstandardNth2gxz + J33L*PDstandardNth3gxz; - JacPDstandardNth3gyy = J13L*PDstandardNth1gyy + J23L*PDstandardNth2gyy - + J33L*PDstandardNth3gyy; + JacPDstandardNth3gyy = J13L*PDstandardNth1gyy + + J23L*PDstandardNth2gyy + J33L*PDstandardNth3gyy; - JacPDstandardNth3gyz = J13L*PDstandardNth1gyz + J23L*PDstandardNth2gyz - + J33L*PDstandardNth3gyz; + JacPDstandardNth3gyz = J13L*PDstandardNth1gyz + + J23L*PDstandardNth2gyz + J33L*PDstandardNth3gyz; - JacPDstandardNth3gzz = J13L*PDstandardNth1gzz + J23L*PDstandardNth2gzz - + J33L*PDstandardNth3gzz; + JacPDstandardNth3gzz = J13L*PDstandardNth1gzz + + J23L*PDstandardNth2gzz + J33L*PDstandardNth3gzz; - JacPDstandardNth3kxx = J13L*PDstandardNth1kxx + J23L*PDstandardNth2kxx - + J33L*PDstandardNth3kxx; + JacPDstandardNth3kxx = J13L*PDstandardNth1kxx + + J23L*PDstandardNth2kxx + J33L*PDstandardNth3kxx; - JacPDstandardNth3kxy = J13L*PDstandardNth1kxy + J23L*PDstandardNth2kxy - + J33L*PDstandardNth3kxy; + JacPDstandardNth3kxy = J13L*PDstandardNth1kxy + + J23L*PDstandardNth2kxy + J33L*PDstandardNth3kxy; - JacPDstandardNth3kxz = J13L*PDstandardNth1kxz + J23L*PDstandardNth2kxz - + J33L*PDstandardNth3kxz; + JacPDstandardNth3kxz = J13L*PDstandardNth1kxz + + J23L*PDstandardNth2kxz + J33L*PDstandardNth3kxz; - JacPDstandardNth3kyy = J13L*PDstandardNth1kyy + J23L*PDstandardNth2kyy - + J33L*PDstandardNth3kyy; + JacPDstandardNth3kyy = J13L*PDstandardNth1kyy + + J23L*PDstandardNth2kyy + J33L*PDstandardNth3kyy; - JacPDstandardNth3kyz = J13L*PDstandardNth1kyz + J23L*PDstandardNth2kyz - + J33L*PDstandardNth3kyz; + JacPDstandardNth3kyz = J13L*PDstandardNth1kyz + + J23L*PDstandardNth2kyz + J33L*PDstandardNth3kyz; JacPDstandardNth11gyy = dJ111L*PDstandardNth1gyy + 2*(J11L*(J21L*PDstandardNth12gyy + J31L*PDstandardNth13gyy) + @@ -981,8 +980,8 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con JacPDstandardNth32gzz = PDstandardNth23gzz; } - CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - SQR(gxyL)) - - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); + CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - + SQR(gxyL)) - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); CCTK_REAL gu11 = INV(detg)*(gyyL*gzzL - SQR(gyzL)); @@ -1070,25 +1069,25 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con CCTK_REAL R11 = 0.5*(gu21*(-JacPDstandardNth12gxx + JacPDstandardNth21gxx) + gu31*(-JacPDstandardNth13gxx + - JacPDstandardNth31gxx) + gu32*(4*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) - JacPDstandardNth11gyz - + JacPDstandardNth21gxz - JacPDstandardNth23gxx + + JacPDstandardNth31gxx) + gu32*(4*(gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323)) - + JacPDstandardNth11gyz + JacPDstandardNth21gxz - JacPDstandardNth23gxx + JacPDstandardNth31gxy) + gu32*(-JacPDstandardNth11gyz + JacPDstandardNth21gxz + JacPDstandardNth31gxy - JacPDstandardNth32gxx) + gu22*(-JacPDstandardNth11gyy + 2*JacPDstandardNth21gxy - - JacPDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-JacPDstandardNth11gzz + 2*JacPDstandardNth31gxz - - JacPDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313))))); + JacPDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-JacPDstandardNth11gzz + + 2*JacPDstandardNth31gxz - JacPDstandardNth33gxx + 2*(2*gyzL*G213*G313 + + 2*G113*(gxyL*G213 + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313)))); CCTK_REAL R12 = 0.5*(gu22*(-JacPDstandardNth12gyy + JacPDstandardNth21gyy) + gu21*(JacPDstandardNth11gyy - @@ -1098,24 +1097,26 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con + gu31*(JacPDstandardNth11gyz - JacPDstandardNth12gxz - JacPDstandardNth13gxy + JacPDstandardNth32gxx) + gu33*(-JacPDstandardNth12gzz + JacPDstandardNth31gyz + - JacPDstandardNth32gxz - JacPDstandardNth33gxy) + 2*(gu31*((G123*G211 - - G113*G212)*gxyL + (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + - G213*gxyL + G313*gxzL) + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + - (-(G212*G213) + G211*G223)*gyyL + (G223*G311 - G213*G312 - - G212*G313)*gyzL + G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + - gu32*((-(G123*G212) + G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL - + G113*(G122*gxxL + G222*gxyL + G322*gxzL) - G112*(G123*gxxL + - G223*gxyL + G323*gxzL) + (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL - + (-(G223*G312) + G222*G313 - G212*G323)*gyzL + (G313*G322 - - G312*G323)*gzzL) + gu33*((-(G133*G212) + G123*G213)*gxyL + - (-(G133*G312) + G123*G313)*gxzL + G113*(G123*gxxL + G223*gxyL + - G323*gxzL) - G112*(G133*gxxL + G233*gxyL + G333*gxzL) + (G213*G223 - - G212*G233)*gyyL + G213*G323*gyzL + (-(G233*G312) + G223*G313 - - G212*G333)*gyzL + (G313*G323 - G312*G333)*gzzL) + gu21*(G122*(G211*gxyL - + G311*gxzL) + G111*(G122*gxxL + G222*gxyL + G322*gxzL) + - G222*(G211*gyyL + G311*gyzL) - 2*(G112*(G212*gxyL + G312*gxzL) + - G212*G312*gyzL) + G322*(G211*gyzL + G311*gzzL) - gxxL*SQR(G112) - - gyyL*SQR(G212) - gzzL*SQR(G312)))); + JacPDstandardNth32gxz - JacPDstandardNth33gxy) + 2*((gxyL*(G123*G211 + - G113*G212) + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + + gxyL*G213 + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - + gzzL*G313) + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + + G311*(gxzL*G123 + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + + G211*G323))*gu31 + (gxyL*(-(G123*G212) + G122*G213) + + gyyL*(G213*G222 - G212*G223) + gxzL*G122*G313 + gyzL*G213*G322 + + gzzL*G313*G322 + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - + G112*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu32 + + (gxyL*(-(G133*G212) + G123*G213) + gyyL*(G213*G223 - G212*G233) + + gxzL*G123*G313 + gyzL*G213*G323 + gzzL*G313*G323 + + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - G112*(gxxL*G133 + + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - gyzL*G233 - + gzzL*G333) + gyzL*(G223*G313 - G212*G333))*gu33 + + gu21*(-2*(gyzL*G212*G312 + G112*(gxyL*G212 + gxzL*G312)) + + G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G211*(gxyL*G122 + + gyyL*G222 + gyzL*G322) + G311*(gxzL*G122 + gyzL*G222 + + gzzL*G322) - gxxL*SQR(G112) - gyyL*SQR(G212) - + gzzL*SQR(G312)))); CCTK_REAL R13 = 0.5*(gu21*(JacPDstandardNth11gyz - JacPDstandardNth12gxz - JacPDstandardNth13gxy + JacPDstandardNth23gxx) @@ -1125,45 +1126,47 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con gu31*(JacPDstandardNth11gzz - 2*JacPDstandardNth13gxz + JacPDstandardNth33gxx) + gu32*(-2*JacPDstandardNth13gyz + JacPDstandardNth21gzz + JacPDstandardNth31gyz - JacPDstandardNth32gxz + - JacPDstandardNth33gxy) + 2*(gu21*((G123*G211 - G113*G212)*gxyL + - (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + G213*gxyL + G313*gxzL) - + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G212*G213) + - G211*G223)*gyyL + (G223*G311 - G213*G312 - G212*G313)*gyzL + - G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + gu22*((G123*G212 - - G122*G213)*gxyL + (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + - G222*gxyL + G322*gxzL) + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (-(G213*G222) + G212*G223)*gyyL + (G223*G312 - G222*G313 - - G213*G322)*gyzL + G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + - gu32*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu31*(G133*(G211*gxyL + G311*gxzL) + G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G211*gyyL + G311*gyzL) - 2*(G113*(G213*gxyL + - G313*gxzL) + G213*G313*gyzL) + G333*(G211*gyzL + G311*gzzL) - - gxxL*SQR(G113) - gyyL*SQR(G213) - gzzL*SQR(G313)))); + JacPDstandardNth33gxy) + 2*((gxyL*(G123*G211 - G113*G212) + + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + gxyL*G213 + + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - gzzL*G313) + + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(gxzL*G123 + + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + G211*G323))*gu21 + + (gxyL*(G123*G212 - G122*G213) + gyyL*(-(G213*G222) + G212*G223) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G313*(-(gxzL*G122) - + gyzL*G222 - gzzL*G322) + G112*(gxxL*G123 + gxyL*G223 + + gxzL*G323) + G312*(gxzL*G123 + gyzL*G223 + gzzL*G323) + + gyzL*(-(G213*G322) + G212*G323))*gu22 + (gxyL*(G133*G212 - + G123*G213) + gyyL*(-(G213*G223) + G212*G233) - G113*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + gyzL*(-(G213*G323) + + G212*G333))*gu32 + gu31*(-2*(gyzL*G213*G313 + G113*(gxyL*G213 + + gxzL*G313)) + G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(gxyL*G133 + gyyL*G233 + gyzL*G333) + G311*(gxzL*G133 + + gyzL*G233 + gzzL*G333) - gxxL*SQR(G113) - gyyL*SQR(G213) - + gzzL*SQR(G313)))); CCTK_REAL R22 = 0.5*(gu21*(JacPDstandardNth12gyy - - JacPDstandardNth21gyy) + gu31*(4*((G123*G212 - G122*G213)*gxyL + - (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + G222*gxyL + G322*gxzL) - + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G213*G222) + - G212*G223)*gyyL + (G223*G312 - G222*G313 - G213*G322)*gyzL + - G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + + JacPDstandardNth21gyy) + gu31*(4*(gxyL*(G123*G212 - G122*G213) + + gyyL*(-(G213*G222) + G212*G223) + gxzL*(G123*G312 - G122*G313) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + gyzL*(G223*G312 - + G222*G313 - G213*G322) + gyzL*G212*G323 + G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + gzzL*(-(G313*G322) + G312*G323)) + JacPDstandardNth12gyz - JacPDstandardNth13gyy - JacPDstandardNth22gxz + JacPDstandardNth32gxy) + gu31*(JacPDstandardNth12gyz - JacPDstandardNth22gxz - JacPDstandardNth31gyy + JacPDstandardNth32gxy) + gu32*(-JacPDstandardNth23gyy + JacPDstandardNth32gyy) + gu11*(-JacPDstandardNth11gyy + 2*JacPDstandardNth12gxy - - JacPDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-JacPDstandardNth22gzz + 2*JacPDstandardNth32gyz - - JacPDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + JacPDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-JacPDstandardNth22gzz + + 2*JacPDstandardNth32gyz - JacPDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL R23 = 0.5*(gu11*(-JacPDstandardNth11gyz + JacPDstandardNth12gxz + JacPDstandardNth13gxy - JacPDstandardNth23gxx) @@ -1173,73 +1176,75 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con gu31*(JacPDstandardNth12gzz - 2*JacPDstandardNth23gxz - JacPDstandardNth31gyz + JacPDstandardNth32gxz + JacPDstandardNth33gxy) + gu32*(JacPDstandardNth22gzz - 2*JacPDstandardNth23gyz + - JacPDstandardNth33gyy) + 2*(gu11*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) + gu21*((-(G123*G212) + - G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL + G113*(G122*gxxL + - G222*gxyL + G322*gxzL) - G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL + (-(G223*G312) + - G222*G313 - G212*G323)*gyzL + (G313*G322 - G312*G323)*gzzL) + - gu31*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu32*(G133*(G222*gxyL + G322*gxzL) + G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G222*gyyL + G322*gyzL) - 2*(G123*(G223*gxyL + - G323*gxzL) + G223*G323*gyzL) + G333*(G222*gyzL + G322*gzzL) - - gxxL*SQR(G123) - gyyL*SQR(G223) - gzzL*SQR(G323)))); + JacPDstandardNth33gyy) + 2*((gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323))*gu11 + + (gxyL*(-(G123*G212) + G122*G213) + gyyL*(G213*G222 - G212*G223) + + gxzL*G122*G313 + gyzL*G213*G322 + gzzL*G313*G322 + + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu21 + + (gxyL*(G133*G212 - G123*G213) + gyyL*(-(G213*G223) + G212*G233) - + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + + gyzL*(-(G213*G323) + G212*G333))*gu31 + gu32*(-2*(gyzL*G223*G323 + + G123*(gxyL*G223 + gxzL*G323)) + G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(gxyL*G133 + gyyL*G233 + gyzL*G333) + + G322*(gxzL*G133 + gyzL*G233 + gzzL*G333) - gxxL*SQR(G123) - + gyyL*SQR(G223) - gzzL*SQR(G323)))); CCTK_REAL R33 = 0.5*(gu31*(JacPDstandardNth13gzz - JacPDstandardNth31gzz) + gu32*(JacPDstandardNth23gzz - - JacPDstandardNth32gzz) + gu21*(4*((-(G133*G212) + G123*G213)*gxyL + - (-(G133*G312) + G123*G313)*gxzL + G113*(G123*gxxL + G223*gxyL + - G323*gxzL) - G112*(G133*gxxL + G233*gxyL + G333*gxzL) + (G213*G223 - - G212*G233)*gyyL + G213*G323*gyzL + (-(G233*G312) + G223*G313 - - G212*G333)*gyzL + (G313*G323 - G312*G333)*gzzL) - JacPDstandardNth12gzz - + JacPDstandardNth13gyz + JacPDstandardNth23gxz - + JacPDstandardNth32gzz) + gu21*(4*(gxyL*(-(G133*G212) + G123*G213) + + gyyL*(G213*G223 - G212*G233) + gxzL*G123*G313 + gyzL*G213*G323 + + gzzL*G313*G323 + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - + gyzL*G233 - gzzL*G333) + gyzL*(G223*G313 - G212*G333)) - + JacPDstandardNth12gzz + JacPDstandardNth13gyz + JacPDstandardNth23gxz - JacPDstandardNth33gxy) + gu21*(JacPDstandardNth13gyz - JacPDstandardNth21gzz + JacPDstandardNth23gxz - JacPDstandardNth33gxy) + gu11*(-JacPDstandardNth11gzz + 2*JacPDstandardNth13gxz - - JacPDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313)))) + gu22*(-JacPDstandardNth22gzz + 2*JacPDstandardNth23gyz - - JacPDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + JacPDstandardNth33gxx + 2*(2*gyzL*G213*G313 + 2*G113*(gxyL*G213 + + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313))) + gu22*(-JacPDstandardNth22gzz + + 2*JacPDstandardNth23gyz - JacPDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL trR = gu11*R11 + gu22*R22 + 2*(gu21*R12 + gu31*R13 + gu32*R23) + gu33*R33; - CCTK_REAL Km11 = gu11*kxxL + gu21*kxyL + gu31*kxzL; + CCTK_REAL Km11 = kxxL*gu11 + kxyL*gu21 + kxzL*gu31; - CCTK_REAL Km21 = gu21*kxxL + gu22*kxyL + gu32*kxzL; + CCTK_REAL Km21 = kxxL*gu21 + kxyL*gu22 + kxzL*gu32; - CCTK_REAL Km31 = gu31*kxxL + gu32*kxyL + gu33*kxzL; + CCTK_REAL Km31 = kxxL*gu31 + kxyL*gu32 + kxzL*gu33; - CCTK_REAL Km12 = gu11*kxyL + gu21*kyyL + gu31*kyzL; + CCTK_REAL Km12 = kxyL*gu11 + kyyL*gu21 + kyzL*gu31; - CCTK_REAL Km22 = gu21*kxyL + gu22*kyyL + gu32*kyzL; + CCTK_REAL Km22 = kxyL*gu21 + kyyL*gu22 + kyzL*gu32; - CCTK_REAL Km32 = gu31*kxyL + gu32*kyyL + gu33*kyzL; + CCTK_REAL Km32 = kxyL*gu31 + kyyL*gu32 + kyzL*gu33; - CCTK_REAL Km13 = gu11*kxzL + gu21*kyzL + gu31*kzzL; + CCTK_REAL Km13 = kxzL*gu11 + kyzL*gu21 + kzzL*gu31; - CCTK_REAL Km23 = gu21*kxzL + gu22*kyzL + gu32*kzzL; + CCTK_REAL Km23 = kxzL*gu21 + kyzL*gu22 + kzzL*gu32; - CCTK_REAL Km33 = gu31*kxzL + gu32*kyzL + gu33*kzzL; + CCTK_REAL Km33 = kxzL*gu31 + kyzL*gu32 + kzzL*gu33; CCTK_REAL trK = Km11 + Km22 + Km33; CCTK_REAL rho = INV(SQR(alpL))*(eTttL - 2*(betayL*eTtyL + - betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + betazL*eTxzL) + - betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + eTyyL*SQR(betayL) + - eTzzL*SQR(betazL)); + betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + + betazL*eTxzL) + betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + + eTyyL*SQR(betayL) + eTzzL*SQR(betazL)); CCTK_REAL S1 = (-eTtxL + betaxL*eTxxL + betayL*eTxyL + betazL*eTxzL)*INV(alpL); @@ -1250,49 +1255,47 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con CCTK_REAL S3 = (-eTtzL + betaxL*eTxzL + betayL*eTyzL + betazL*eTzzL)*INV(alpL); - CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - - 50.26548245743669181540229413247204614715*rho + trR - SQR(Km11) - - SQR(Km22) - SQR(Km33) + SQR(trK); + CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - 16*Pi*rho + + trR - SQR(Km11) - SQR(Km22) - SQR(Km33) + SQR(trK); - CCTK_REAL M1L = gu21*(-JacPDstandardNth1kxy + JacPDstandardNth2kxx - - G112*kxxL + (G111 - G212)*kxyL - G312*kxzL + G211*kyyL + G311*kyzL) + - gu22*(-JacPDstandardNth1kyy + JacPDstandardNth2kxy - G122*kxxL + (G112 - - G222)*kxyL - G322*kxzL + G212*kyyL + G312*kyzL) + - gu31*(-JacPDstandardNth1kxz + JacPDstandardNth3kxx - G113*kxxL - - G213*kxyL + (G111 - G313)*kxzL + G211*kyzL + G311*kzzL) + - gu32*(JacPDstandardNth2kxz + JacPDstandardNth3kxy + G113*kxyL + - G112*kxzL - 2*(JacPDstandardNth1kyz + G123*kxxL + G223*kxyL + - G323*kxzL) + G213*kyyL + (G212 + G313)*kyzL + G312*kzzL) + - gu33*(-JacPDstandardNth1kzz + JacPDstandardNth3kxz - G133*kxxL - - G233*kxyL + (G113 - G333)*kxzL + G213*kyzL + G313*kzzL) - - 25.13274122871834590770114706623602307358*S1; + CCTK_REAL M1L = gu21*(-(kxxL*G112) + kyyL*G211 + kxyL*(G111 - + G212) + kyzL*G311 - kxzL*G312 - JacPDstandardNth1kxy + + JacPDstandardNth2kxx) + gu22*(-(kxxL*G122) + kyyL*G212 + + kxyL*(G112 - G222) + kyzL*G312 - kxzL*G322 - JacPDstandardNth1kyy + + JacPDstandardNth2kxy) + gu31*(-(kxxL*G113) + kyzL*G211 - + kxyL*G213 + kzzL*G311 + kxzL*(G111 - G313) - JacPDstandardNth1kxz + + JacPDstandardNth3kxx) + gu32*(kyyL*G213 + kxyL*(G113 - 2*G223) + + kzzL*G312 + kyzL*(G212 + G313) + kxzL*(G112 - 2*G323) - + 2*(kxxL*G123 + JacPDstandardNth1kyz) + JacPDstandardNth2kxz + + JacPDstandardNth3kxy) + gu33*(-(kxxL*G133) + kyzL*G213 - + kxyL*G233 + kzzL*G313 + kxzL*(G113 - G333) - JacPDstandardNth1kzz + + JacPDstandardNth3kxz) - 8*Pi*S1; - CCTK_REAL M2L = gu11*(JacPDstandardNth1kxy - JacPDstandardNth2kxx + - G112*kxxL + (-G111 + G212)*kxyL + G312*kxzL - G211*kyyL - G311*kyzL) + - gu21*(JacPDstandardNth1kyy - JacPDstandardNth2kxy + G122*kxxL + (-G112 - + G222)*kxyL + G322*kxzL - G212*kyyL - G312*kyzL) + - gu31*(JacPDstandardNth1kyz + JacPDstandardNth3kxy + G123*kxxL + - G223*kxyL + (G112 + G323)*kxzL + G212*kyzL - 2*(JacPDstandardNth2kxz + - G113*kxyL + G213*kyyL + G313*kyzL) + G312*kzzL) + - gu32*(-JacPDstandardNth2kyz + JacPDstandardNth3kyy - G123*kxyL + - G122*kxzL - G223*kyyL + (G222 - G323)*kyzL + G322*kzzL) + - gu33*(-JacPDstandardNth2kzz + JacPDstandardNth3kyz - G133*kxyL + - G123*kxzL - G233*kyyL + (G223 - G333)*kyzL + G323*kzzL) - - 25.13274122871834590770114706623602307358*S2; + CCTK_REAL M2L = gu11*(kxxL*G112 - kyyL*G211 + kxyL*(-G111 + + G212) - kyzL*G311 + kxzL*G312 + JacPDstandardNth1kxy - + JacPDstandardNth2kxx) + gu21*(kxxL*G122 - kyyL*G212 + kxyL*(-G112 + + G222) - kyzL*G312 + kxzL*G322 + JacPDstandardNth1kyy - + JacPDstandardNth2kxy) + gu31*(kxxL*G123 + kxyL*G223 + kzzL*G312 + + kyzL*(G212 - 2*G313) + kxzL*(G112 + G323) + JacPDstandardNth1kyz - + 2*(kxyL*G113 + kyyL*G213 + JacPDstandardNth2kxz) + + JacPDstandardNth3kxy) + gu32*(kxzL*G122 - kxyL*G123 - kyyL*G223 + + kzzL*G322 + kyzL*(G222 - G323) - JacPDstandardNth2kyz + + JacPDstandardNth3kyy) + gu33*(kxzL*G123 - kxyL*G133 - kyyL*G233 + + kzzL*G323 + kyzL*(G223 - G333) - JacPDstandardNth2kzz + + JacPDstandardNth3kyz) - 8*Pi*S2; - CCTK_REAL M3L = (G323*gu22 + G333*gu32)*kyzL + - gu11*(JacPDstandardNth1kxz - JacPDstandardNth3kxx + G113*kxxL + - G213*kxyL + (-G111 + G313)*kxzL - G211*kyzL - G311*kzzL) + - gu31*(JacPDstandardNth1kzz - JacPDstandardNth3kxz + G133*kxxL + - G233*kxyL + (-G113 + G333)*kxzL - G213*kyzL - G313*kzzL) + - gu22*(JacPDstandardNth2kyz - JacPDstandardNth3kyy + G123*kxyL - - G122*kxzL + G223*kyyL - G222*kyzL - G322*kzzL) + - gu32*(JacPDstandardNth2kzz - JacPDstandardNth3kyz + G133*kxyL - - G123*kxzL + G233*kyyL - G223*kyzL - G323*kzzL) + - gu21*(JacPDstandardNth1kyz + JacPDstandardNth2kxz + G123*kxxL + (G113 + - G223)*kxyL + G323*kxzL + G213*kyyL + G313*kyzL - - 2*(JacPDstandardNth3kxy + G112*kxzL + G212*kyzL + G312*kzzL)) - - 25.13274122871834590770114706623602307358*S3; + CCTK_REAL M3L = gu11*(kxxL*G113 - kyzL*G211 + kxyL*G213 - + kzzL*G311 + kxzL*(-G111 + G313) + JacPDstandardNth1kxz - + JacPDstandardNth3kxx) + gu21*(kxxL*G123 + kyyL*G213 + kxyL*(G113 + + G223) + kyzL*G313 + kxzL*G323 + JacPDstandardNth1kyz + + JacPDstandardNth2kxz - 2*(kxzL*G112 + kyzL*G212 + kzzL*G312 + + JacPDstandardNth3kxy)) + gu31*(kxxL*G133 - kyzL*G213 + kxyL*G233 + - kzzL*G313 + kxzL*(-G113 + G333) + JacPDstandardNth1kzz - + JacPDstandardNth3kxz) + gu22*(-(kxzL*G122) + kxyL*G123 + + kyyL*G223 - kzzL*G322 + kyzL*(-G222 + G323) + + JacPDstandardNth2kyz - JacPDstandardNth3kyy) + gu32*(-(kxzL*G123) + + kxyL*G133 + kyyL*G233 - kzzL*G323 + kyzL*(-G223 + G333) + + JacPDstandardNth2kzz - JacPDstandardNth3kyz) - 8*Pi*S3; /* Copy local copies back to grid functions */ H[index] = HL; @@ -1300,7 +1303,7 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con M2[index] = M2L; M3[index] = M3L; } - LC_ENDLOOP3 (ML_ADMConstraints_MP); + CCTK_ENDLOOP3(ML_ADMConstraints_MP); } extern "C" void ML_ADMConstraints_MP(CCTK_ARGUMENTS) @@ -1319,12 +1322,18 @@ extern "C" void ML_ADMConstraints_MP(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints_MP::ML_Ham","ML_ADMConstraints_MP::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints_MP::ML_Ham", + "ML_ADMConstraints_MP::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints_MP", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints_MP", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_MP_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_MP_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints_MP/src/make.code.defn b/ML_ADMConstraints_MP/src/make.code.defn index c8add04..da1ced9 100644 --- a/ML_ADMConstraints_MP/src/make.code.defn +++ b/ML_ADMConstraints_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints_MP.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints_MP.cc Boundaries.cc diff --git a/ML_ADMConstraints_O2/configuration.ccl b/ML_ADMConstraints_O2/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMConstraints_O2/configuration.ccl +++ b/ML_ADMConstraints_O2/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMConstraints_O2/param.ccl b/ML_ADMConstraints_O2/param.ccl index 79cb59b..62e7fd5 100644 --- a/ML_ADMConstraints_O2/param.ccl +++ b/ML_ADMConstraints_O2/param.ccl @@ -41,6 +41,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMConstraints_O2_calc_every "ML_ADMConstraints_O2_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMConstraints_O2/schedule.ccl b/ML_ADMConstraints_O2/schedule.ccl index f7f2fd9..c1489cb 100644 --- a/ML_ADMConstraints_O2/schedule.ccl +++ b/ML_ADMConstraints_O2/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Ham[3] +if (timelevels == 1) +{ + STORAGE: ML_Ham[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Ham[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Ham[3] +} -STORAGE: ML_mom[3] +if (timelevels == 1) +{ + STORAGE: ML_mom[1] +} +if (timelevels == 2) +{ + STORAGE: ML_mom[2] +} +if (timelevels == 3) +{ + STORAGE: ML_mom[3] +} schedule ML_ADMConstraints_O2_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMConstraints_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,15 @@ schedule group ML_ADMConstraints_O2_group in MoL_PseudoEvolution after MoL_PostS schedule ML_ADMConstraints_O2 in ML_ADMConstraints_O2_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints_O2::ML_Ham + WRITES: ML_ADMConstraints_O2::ML_mom } "ML_ADMConstraints_O2" schedule ML_ADMConstraints_O2_SelectBCs in ML_ADMConstraints_O2_bc_group @@ -68,6 +93,12 @@ schedule ML_ADMConstraints_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_O2_ApplyBCs in MoL_PostStep after ML_ADMConstraints_O2_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc b/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc index 1542371..d00d9c1 100644 --- a/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc +++ b/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -91,7 +90,7 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMConstraints_O2, + CCTK_LOOP3(ML_ADMConstraints_O2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -204,8 +203,8 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL const PDstandardNth2kzz = PDstandardNth2(&kzz[index]); /* Calculate temporaries and grid functions */ - CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - SQR(gxyL)) - - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); + CCTK_REAL detg = 2*gxyL*gxzL*gyzL + gzzL*(gxxL*gyyL - + SQR(gxyL)) - gyyL*SQR(gxzL) - gxxL*SQR(gyzL); CCTK_REAL gu11 = INV(detg)*(gyyL*gzzL - SQR(gyzL)); @@ -285,156 +284,163 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con + gu32*(-PDstandardNth2gzz + 2*PDstandardNth3gyz) + gu33*PDstandardNth3gzz); - CCTK_REAL R11 = 0.5*(gu32*(4*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) + + CCTK_REAL R11 = 0.5*(gu32*(4*(gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323)) + 2*(-PDstandardNth11gyz + PDstandardNth12gxz + PDstandardNth13gxy - PDstandardNth23gxx)) + gu22*(-PDstandardNth11gyy + 2*PDstandardNth12gxy - - PDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-PDstandardNth11gzz + 2*PDstandardNth13gxz - - PDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313))))); + - PDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-PDstandardNth11gzz + + 2*PDstandardNth13gxz - PDstandardNth33gxx + 2*(2*gyzL*G213*G313 + + 2*G113*(gxyL*G213 + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313)))); CCTK_REAL R12 = 0.5*(gu21*(PDstandardNth11gyy - 2*PDstandardNth12gxy + PDstandardNth22gxx) + gu31*(PDstandardNth11gyz - PDstandardNth12gxz - PDstandardNth13gxy + PDstandardNth23gxx) + gu32*(-PDstandardNth12gyz + PDstandardNth13gyy + PDstandardNth22gxz - PDstandardNth23gxy) + gu33*(-PDstandardNth12gzz + PDstandardNth13gyz + PDstandardNth23gxz - - PDstandardNth33gxy) + 2*(gu31*((G123*G211 - G113*G212)*gxyL + - (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + G213*gxyL + G313*gxzL) - + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G212*G213) + - G211*G223)*gyyL + (G223*G311 - G213*G312 - G212*G313)*gyzL + - G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + gu32*((-(G123*G212) - + G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL + G113*(G122*gxxL + - G222*gxyL + G322*gxzL) - G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL + (-(G223*G312) + - G222*G313 - G212*G323)*gyzL + (G313*G322 - G312*G323)*gzzL) + - gu33*((-(G133*G212) + G123*G213)*gxyL + (-(G133*G312) + G123*G313)*gxzL - + G113*(G123*gxxL + G223*gxyL + G323*gxzL) - G112*(G133*gxxL + - G233*gxyL + G333*gxzL) + (G213*G223 - G212*G233)*gyyL + G213*G323*gyzL - + (-(G233*G312) + G223*G313 - G212*G333)*gyzL + (G313*G323 - - G312*G333)*gzzL) + gu21*(G122*(G211*gxyL + G311*gxzL) + G111*(G122*gxxL - + G222*gxyL + G322*gxzL) + G222*(G211*gyyL + G311*gyzL) - - 2*(G112*(G212*gxyL + G312*gxzL) + G212*G312*gyzL) + G322*(G211*gyzL + - G311*gzzL) - gxxL*SQR(G112) - gyyL*SQR(G212) - gzzL*SQR(G312)))); + PDstandardNth33gxy) + 2*((gxyL*(G123*G211 - G113*G212) + + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + gxyL*G213 + + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - gzzL*G313) + + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(gxzL*G123 + + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + G211*G323))*gu31 + + (gxyL*(-(G123*G212) + G122*G213) + gyyL*(G213*G222 - G212*G223) + + gxzL*G122*G313 + gyzL*G213*G322 + gzzL*G313*G322 + + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu32 + + (gxyL*(-(G133*G212) + G123*G213) + gyyL*(G213*G223 - G212*G233) + + gxzL*G123*G313 + gyzL*G213*G323 + gzzL*G313*G323 + + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - G112*(gxxL*G133 + + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - gyzL*G233 - + gzzL*G333) + gyzL*(G223*G313 - G212*G333))*gu33 + + gu21*(-2*(gyzL*G212*G312 + G112*(gxyL*G212 + gxzL*G312)) + + G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G211*(gxyL*G122 + + gyyL*G222 + gyzL*G322) + G311*(gxzL*G122 + gyzL*G222 + + gzzL*G322) - gxxL*SQR(G112) - gyyL*SQR(G212) - + gzzL*SQR(G312)))); CCTK_REAL R13 = 0.5*(gu21*(PDstandardNth11gyz - PDstandardNth12gxz - PDstandardNth13gxy + PDstandardNth23gxx) + gu22*(PDstandardNth12gyz - PDstandardNth13gyy - PDstandardNth22gxz + PDstandardNth23gxy) + gu31*(PDstandardNth11gzz - 2*PDstandardNth13gxz + PDstandardNth33gxx) + gu32*(PDstandardNth12gzz - PDstandardNth13gyz - PDstandardNth23gxz + - PDstandardNth33gxy) + 2*(gu21*((G123*G211 - G113*G212)*gxyL + - (G123*G311 - G113*G312)*gxzL - G112*(G113*gxxL + G213*gxyL + G313*gxzL) - + G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G212*G213) + - G211*G223)*gyyL + (G223*G311 - G213*G312 - G212*G313)*gyzL + - G211*G323*gyzL + (-(G312*G313) + G311*G323)*gzzL) + gu22*((G123*G212 - - G122*G213)*gxyL + (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + - G222*gxyL + G322*gxzL) + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (-(G213*G222) + G212*G223)*gyyL + (G223*G312 - G222*G313 - - G213*G322)*gyzL + G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + - gu32*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu31*(G133*(G211*gxyL + G311*gxzL) + G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G211*gyyL + G311*gyzL) - 2*(G113*(G213*gxyL + - G313*gxzL) + G213*G313*gyzL) + G333*(G211*gyzL + G311*gzzL) - - gxxL*SQR(G113) - gyyL*SQR(G213) - gzzL*SQR(G313)))); - - CCTK_REAL R22 = 0.5*(gu31*(4*((G123*G212 - G122*G213)*gxyL + - (G123*G312 - G122*G313)*gxzL - G113*(G122*gxxL + G222*gxyL + G322*gxzL) - + G112*(G123*gxxL + G223*gxyL + G323*gxzL) + (-(G213*G222) + - G212*G223)*gyyL + (G223*G312 - G222*G313 - G213*G322)*gyzL + - G212*G323*gyzL + (-(G313*G322) + G312*G323)*gzzL) + + PDstandardNth33gxy) + 2*((gxyL*(G123*G211 - G113*G212) + + gyyL*(-(G212*G213) + G211*G223) - G112*(gxxL*G113 + gxyL*G213 + + gxzL*G313) + G312*(-(gxzL*G113) - gyzL*G213 - gzzL*G313) + + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(gxzL*G123 + + gyzL*G223 + gzzL*G323) + gyzL*(-(G212*G313) + G211*G323))*gu21 + + (gxyL*(G123*G212 - G122*G213) + gyyL*(-(G213*G222) + G212*G223) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + G313*(-(gxzL*G122) - + gyzL*G222 - gzzL*G322) + G112*(gxxL*G123 + gxyL*G223 + + gxzL*G323) + G312*(gxzL*G123 + gyzL*G223 + gzzL*G323) + + gyzL*(-(G213*G322) + G212*G323))*gu22 + (gxyL*(G133*G212 - + G123*G213) + gyyL*(-(G213*G223) + G212*G233) - G113*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + gyzL*(-(G213*G323) + + G212*G333))*gu32 + gu31*(-2*(gyzL*G213*G313 + G113*(gxyL*G213 + + gxzL*G313)) + G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(gxyL*G133 + gyyL*G233 + gyzL*G333) + G311*(gxzL*G133 + + gyzL*G233 + gzzL*G333) - gxxL*SQR(G113) - gyyL*SQR(G213) - + gzzL*SQR(G313)))); + + CCTK_REAL R22 = 0.5*(gu31*(4*(gxyL*(G123*G212 - G122*G213) + + gyyL*(-(G213*G222) + G212*G223) + gxzL*(G123*G312 - G122*G313) - + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) + gyzL*(G223*G312 - + G222*G313 - G213*G322) + gyzL*G212*G323 + G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + gzzL*(-(G313*G322) + G312*G323)) + 2*(PDstandardNth12gyz - PDstandardNth13gyy - PDstandardNth22gxz + PDstandardNth23gxy)) + gu11*(-PDstandardNth11gyy + 2*PDstandardNth12gxy - - PDstandardNth22gxx + 2*(G122*(-(G211*gxyL) - G311*gxzL) + - 2*G112*(G212*gxyL + G312*gxzL) - G111*(G122*gxxL + G222*gxyL + - G322*gxzL) + (2*G212*G312 - G211*G322)*gyzL + G222*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G112) + gyyL*SQR(G212) + gzzL*(-(G311*G322) + - SQR(G312)))) + gu33*(-PDstandardNth22gzz + 2*PDstandardNth23gyz - - PDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + - PDstandardNth22gxx + 2*(2*gyzL*G212*G312 + 2*G112*(gxyL*G212 + + gxzL*G312) - G111*(gxxL*G122 + gxyL*G222 + gxzL*G322) + + G211*(-(gxyL*G122) - gyyL*G222 - gyzL*G322) + + G311*(-(gxzL*G122) - gyzL*G222 - gzzL*G322) + gxxL*SQR(G112) + + gyyL*SQR(G212) + gzzL*SQR(G312))) + gu33*(-PDstandardNth22gzz + + 2*PDstandardNth23gyz - PDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL R23 = 0.5*(gu11*(-PDstandardNth11gyz + PDstandardNth12gxz + PDstandardNth13gxy - PDstandardNth23gxx) + gu21*(-PDstandardNth12gyz + PDstandardNth13gyy + PDstandardNth22gxz - PDstandardNth23gxy) + gu31*(PDstandardNth12gzz - PDstandardNth13gyz - PDstandardNth23gxz + PDstandardNth33gxy) + gu32*(PDstandardNth22gzz - 2*PDstandardNth23gyz + - PDstandardNth33gyy) + 2*(gu11*((-(G123*G211) + G113*G212)*gxyL + - (-(G123*G311) + G113*G312)*gxzL + G112*(G113*gxxL + G213*gxyL + - G313*gxzL) - G111*(G123*gxxL + G223*gxyL + G323*gxzL) + (G212*G213 - - G211*G223)*gyyL + G212*G313*gyzL + (-(G223*G311) + G213*G312 - - G211*G323)*gyzL + (G312*G313 - G311*G323)*gzzL) + gu21*((-(G123*G212) + - G122*G213)*gxyL + (-(G123*G312) + G122*G313)*gxzL + G113*(G122*gxxL + - G222*gxyL + G322*gxzL) - G112*(G123*gxxL + G223*gxyL + G323*gxzL) + - (G213*G222 - G212*G223)*gyyL + G213*G322*gyzL + (-(G223*G312) + - G222*G313 - G212*G323)*gyzL + (G313*G322 - G312*G323)*gzzL) + - gu31*((G133*G212 - G123*G213)*gxyL + (G133*G312 - G123*G313)*gxzL - - G113*(G123*gxxL + G223*gxyL + G323*gxzL) + G112*(G133*gxxL + G233*gxyL - + G333*gxzL) + (-(G213*G223) + G212*G233)*gyyL + (G233*G312 - G223*G313 - - G213*G323)*gyzL + G212*G333*gyzL + (-(G313*G323) + G312*G333)*gzzL) + - gu32*(G133*(G222*gxyL + G322*gxzL) + G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + G233*(G222*gyyL + G322*gyzL) - 2*(G123*(G223*gxyL + - G323*gxzL) + G223*G323*gyzL) + G333*(G222*gyzL + G322*gzzL) - - gxxL*SQR(G123) - gyyL*SQR(G223) - gzzL*SQR(G323)))); - - CCTK_REAL R33 = 0.5*(gu21*(4*((-(G133*G212) + G123*G213)*gxyL + - (-(G133*G312) + G123*G313)*gxzL + G113*(G123*gxxL + G223*gxyL + - G323*gxzL) - G112*(G133*gxxL + G233*gxyL + G333*gxzL) + (G213*G223 - - G212*G233)*gyyL + G213*G323*gyzL + (-(G233*G312) + G223*G313 - - G212*G333)*gyzL + (G313*G323 - G312*G333)*gzzL) + + PDstandardNth33gyy) + 2*((gxyL*(-(G123*G211) + G113*G212) + + gyyL*(G212*G213 - G211*G223) + gxzL*G113*G312 + gyzL*G212*G313 + + gzzL*G312*G313 + G112*(gxxL*G113 + gxyL*G213 + gxzL*G313) - + G111*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G311*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + gyzL*(G213*G312 - G211*G323))*gu11 + + (gxyL*(-(G123*G212) + G122*G213) + gyyL*(G213*G222 - G212*G223) + + gxzL*G122*G313 + gyzL*G213*G322 + gzzL*G313*G322 + + G113*(gxxL*G122 + gxyL*G222 + gxzL*G322) - G112*(gxxL*G123 + + gxyL*G223 + gxzL*G323) + G312*(-(gxzL*G123) - gyzL*G223 - + gzzL*G323) + gyzL*(G222*G313 - G212*G323))*gu21 + + (gxyL*(G133*G212 - G123*G213) + gyyL*(-(G213*G223) + G212*G233) - + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) + G313*(-(gxzL*G123) - + gyzL*G223 - gzzL*G323) + G112*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G312*(gxzL*G133 + gyzL*G233 + gzzL*G333) + + gyzL*(-(G213*G323) + G212*G333))*gu31 + gu32*(-2*(gyzL*G223*G323 + + G123*(gxyL*G223 + gxzL*G323)) + G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(gxyL*G133 + gyyL*G233 + gyzL*G333) + + G322*(gxzL*G133 + gyzL*G233 + gzzL*G333) - gxxL*SQR(G123) - + gyyL*SQR(G223) - gzzL*SQR(G323)))); + + CCTK_REAL R33 = 0.5*(gu21*(4*(gxyL*(-(G133*G212) + G123*G213) + + gyyL*(G213*G223 - G212*G233) + gxzL*G123*G313 + gyzL*G213*G323 + + gzzL*G313*G323 + G113*(gxxL*G123 + gxyL*G223 + gxzL*G323) - + G112*(gxxL*G133 + gxyL*G233 + gxzL*G333) + G312*(-(gxzL*G133) - + gyzL*G233 - gzzL*G333) + gyzL*(G223*G313 - G212*G333)) + 2*(-PDstandardNth12gzz + PDstandardNth13gyz + PDstandardNth23gxz - PDstandardNth33gxy)) + gu11*(-PDstandardNth11gzz + 2*PDstandardNth13gxz - - PDstandardNth33gxx + 2*(G133*(-(G211*gxyL) - G311*gxzL) + - 2*G113*(G213*gxyL + G313*gxzL) - G111*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G213*G313 - G211*G333)*gyzL + G233*(-(G211*gyyL) - - G311*gyzL) + gxxL*SQR(G113) + gyyL*SQR(G213) + gzzL*(-(G311*G333) + - SQR(G313)))) + gu22*(-PDstandardNth22gzz + 2*PDstandardNth23gyz - - PDstandardNth33gyy + 2*(G133*(-(G222*gxyL) - G322*gxzL) + - 2*G123*(G223*gxyL + G323*gxzL) - G122*(G133*gxxL + G233*gxyL + - G333*gxzL) + (2*G223*G323 - G222*G333)*gyzL + G233*(-(G222*gyyL) - - G322*gyzL) + gxxL*SQR(G123) + gyyL*SQR(G223) + gzzL*(-(G322*G333) + - SQR(G323))))); + - PDstandardNth33gxx + 2*(2*gyzL*G213*G313 + 2*G113*(gxyL*G213 + + gxzL*G313) - G111*(gxxL*G133 + gxyL*G233 + gxzL*G333) + + G211*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G311*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G113) + + gyyL*SQR(G213) + gzzL*SQR(G313))) + gu22*(-PDstandardNth22gzz + + 2*PDstandardNth23gyz - PDstandardNth33gyy + 2*(2*gyzL*G223*G323 + + 2*G123*(gxyL*G223 + gxzL*G323) - G122*(gxxL*G133 + gxyL*G233 + + gxzL*G333) + G222*(-(gxyL*G133) - gyyL*G233 - gyzL*G333) + + G322*(-(gxzL*G133) - gyzL*G233 - gzzL*G333) + gxxL*SQR(G123) + + gyyL*SQR(G223) + gzzL*SQR(G323)))); CCTK_REAL trR = gu11*R11 + gu22*R22 + 2*(gu21*R12 + gu31*R13 + gu32*R23) + gu33*R33; - CCTK_REAL Km11 = gu11*kxxL + gu21*kxyL + gu31*kxzL; + CCTK_REAL Km11 = kxxL*gu11 + kxyL*gu21 + kxzL*gu31; - CCTK_REAL Km21 = gu21*kxxL + gu22*kxyL + gu32*kxzL; + CCTK_REAL Km21 = kxxL*gu21 + kxyL*gu22 + kxzL*gu32; - CCTK_REAL Km31 = gu31*kxxL + gu32*kxyL + gu33*kxzL; + CCTK_REAL Km31 = kxxL*gu31 + kxyL*gu32 + kxzL*gu33; - CCTK_REAL Km12 = gu11*kxyL + gu21*kyyL + gu31*kyzL; + CCTK_REAL Km12 = kxyL*gu11 + kyyL*gu21 + kyzL*gu31; - CCTK_REAL Km22 = gu21*kxyL + gu22*kyyL + gu32*kyzL; + CCTK_REAL Km22 = kxyL*gu21 + kyyL*gu22 + kyzL*gu32; - CCTK_REAL Km32 = gu31*kxyL + gu32*kyyL + gu33*kyzL; + CCTK_REAL Km32 = kxyL*gu31 + kyyL*gu32 + kyzL*gu33; - CCTK_REAL Km13 = gu11*kxzL + gu21*kyzL + gu31*kzzL; + CCTK_REAL Km13 = kxzL*gu11 + kyzL*gu21 + kzzL*gu31; - CCTK_REAL Km23 = gu21*kxzL + gu22*kyzL + gu32*kzzL; + CCTK_REAL Km23 = kxzL*gu21 + kyzL*gu22 + kzzL*gu32; - CCTK_REAL Km33 = gu31*kxzL + gu32*kyzL + gu33*kzzL; + CCTK_REAL Km33 = kxzL*gu31 + kyzL*gu32 + kzzL*gu33; CCTK_REAL trK = Km11 + Km22 + Km33; CCTK_REAL rho = INV(SQR(alpL))*(eTttL - 2*(betayL*eTtyL + - betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + betazL*eTxzL) + - betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + eTyyL*SQR(betayL) + - eTzzL*SQR(betazL)); + betazL*eTtzL) + 2*(betaxL*(-eTtxL + betayL*eTxyL + + betazL*eTxzL) + betayL*betazL*eTyzL) + eTxxL*SQR(betaxL) + + eTyyL*SQR(betayL) + eTzzL*SQR(betazL)); CCTK_REAL S1 = (-eTtxL + betaxL*eTxxL + betayL*eTxyL + betazL*eTxzL)*INV(alpL); @@ -445,45 +451,47 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL S3 = (-eTtzL + betaxL*eTxzL + betayL*eTyzL + betazL*eTzzL)*INV(alpL); - CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - - 50.26548245743669181540229413247204614715*rho + trR - SQR(Km11) - - SQR(Km22) - SQR(Km33) + SQR(trK); - - CCTK_REAL M1L = gu21*(-(G112*kxxL) + (G111 - G212)*kxyL - G312*kxzL + - G211*kyyL + G311*kyzL - PDstandardNth1kxy + PDstandardNth2kxx) + - gu22*(-(G122*kxxL) + (G112 - G222)*kxyL - G322*kxzL + G212*kyyL + - G312*kyzL - PDstandardNth1kyy + PDstandardNth2kxy) + gu31*(-(G113*kxxL) - - G213*kxyL + (G111 - G313)*kxzL + G211*kyzL + G311*kzzL - - PDstandardNth1kxz + PDstandardNth3kxx) + gu32*(G113*kxyL + G112*kxzL + - G213*kyyL + (G212 + G313)*kyzL + G312*kzzL - 2*(G123*kxxL + G223*kxyL + - G323*kxzL + PDstandardNth1kyz) + PDstandardNth2kxz + PDstandardNth3kxy) - + gu33*(-(G133*kxxL) - G233*kxyL + (G113 - G333)*kxzL + G213*kyzL + - G313*kzzL - PDstandardNth1kzz + PDstandardNth3kxz) - - 25.13274122871834590770114706623602307358*S1; - - CCTK_REAL M2L = gu11*(G112*kxxL + (-G111 + G212)*kxyL + G312*kxzL - - G211*kyyL - G311*kyzL + PDstandardNth1kxy - PDstandardNth2kxx) + - gu21*(G122*kxxL + (-G112 + G222)*kxyL + G322*kxzL - G212*kyyL - - G312*kyzL + PDstandardNth1kyy - PDstandardNth2kxy) + gu31*(G123*kxxL + - (-2*G113 + G223)*kxyL + (G112 + G323)*kxzL + G212*kyzL + G312*kzzL + - PDstandardNth1kyz - 2*(G213*kyyL + G313*kyzL + PDstandardNth2kxz) + - PDstandardNth3kxy) + gu32*(-(G123*kxyL) + G122*kxzL - G223*kyyL + (G222 - - G323)*kyzL + G322*kzzL - PDstandardNth2kyz + PDstandardNth3kyy) + - gu33*(-(G133*kxyL) + G123*kxzL - G233*kyyL + (G223 - G333)*kyzL + - G323*kzzL - PDstandardNth2kzz + PDstandardNth3kyz) - - 25.13274122871834590770114706623602307358*S2; - - CCTK_REAL M3L = gu11*(G113*kxxL + G213*kxyL + (-G111 + G313)*kxzL - - G211*kyzL - G311*kzzL + PDstandardNth1kxz - PDstandardNth3kxx) + - gu21*(G123*kxxL + (G113 + G223)*kxyL + (-2*G112 + G323)*kxzL + - G213*kyyL + (-2*G212 + G313)*kyzL + PDstandardNth1kyz + - PDstandardNth2kxz - 2*(G312*kzzL + PDstandardNth3kxy)) + - gu31*(G133*kxxL + G233*kxyL + (-G113 + G333)*kxzL - G213*kyzL - - G313*kzzL + PDstandardNth1kzz - PDstandardNth3kxz) + gu22*(G123*kxyL - - G122*kxzL + G223*kyyL + (-G222 + G323)*kyzL - G322*kzzL + - PDstandardNth2kyz - PDstandardNth3kyy) + gu32*(G133*kxyL - G123*kxzL + - G233*kyyL + (-G223 + G333)*kyzL - G323*kzzL + PDstandardNth2kzz - - PDstandardNth3kyz) - 25.13274122871834590770114706623602307358*S3; + CCTK_REAL HL = -2*(Km12*Km21 + Km13*Km31 + Km23*Km32) - 16*Pi*rho + + trR - SQR(Km11) - SQR(Km22) - SQR(Km33) + SQR(trK); + + CCTK_REAL M1L = gu21*(-(kxxL*G112) + kyyL*G211 + kxyL*(G111 - + G212) + kyzL*G311 - kxzL*G312 - PDstandardNth1kxy + + PDstandardNth2kxx) + gu22*(-(kxxL*G122) + kyyL*G212 + kxyL*(G112 + - G222) + kyzL*G312 - kxzL*G322 - PDstandardNth1kyy + + PDstandardNth2kxy) + gu31*(-(kxxL*G113) + kyzL*G211 - kxyL*G213 + + kzzL*G311 + kxzL*(G111 - G313) - PDstandardNth1kxz + + PDstandardNth3kxx) + gu32*(kyyL*G213 + kxyL*(G113 - 2*G223) + + kzzL*G312 + kyzL*(G212 + G313) + kxzL*(G112 - 2*G323) - + 2*(kxxL*G123 + PDstandardNth1kyz) + PDstandardNth2kxz + + PDstandardNth3kxy) + gu33*(-(kxxL*G133) + kyzL*G213 - kxyL*G233 + + kzzL*G313 + kxzL*(G113 - G333) - PDstandardNth1kzz + + PDstandardNth3kxz) - 8*Pi*S1; + + CCTK_REAL M2L = gu11*(kxxL*G112 - kyyL*G211 + kxyL*(-G111 + + G212) - kyzL*G311 + kxzL*G312 + PDstandardNth1kxy - + PDstandardNth2kxx) + gu21*(kxxL*G122 - kyyL*G212 + kxyL*(-G112 + + G222) - kyzL*G312 + kxzL*G322 + PDstandardNth1kyy - + PDstandardNth2kxy) + gu31*(kxxL*G123 + kxyL*G223 + kzzL*G312 + + kyzL*(G212 - 2*G313) + kxzL*(G112 + G323) + PDstandardNth1kyz - + 2*(kxyL*G113 + kyyL*G213 + PDstandardNth2kxz) + PDstandardNth3kxy) + + gu32*(kxzL*G122 - kxyL*G123 - kyyL*G223 + kzzL*G322 + + kyzL*(G222 - G323) - PDstandardNth2kyz + PDstandardNth3kyy) + + gu33*(kxzL*G123 - kxyL*G133 - kyyL*G233 + kzzL*G323 + + kyzL*(G223 - G333) - PDstandardNth2kzz + PDstandardNth3kyz) - + 8*Pi*S2; + + CCTK_REAL M3L = gu11*(kxxL*G113 - kyzL*G211 + kxyL*G213 - + kzzL*G311 + kxzL*(-G111 + G313) + PDstandardNth1kxz - + PDstandardNth3kxx) + gu21*(kxxL*G123 + kyyL*G213 + kxyL*(G113 + + G223) + kyzL*G313 + kxzL*G323 + PDstandardNth1kyz + + PDstandardNth2kxz - 2*(kxzL*G112 + kyzL*G212 + kzzL*G312 + + PDstandardNth3kxy)) + gu31*(kxxL*G133 - kyzL*G213 + kxyL*G233 - + kzzL*G313 + kxzL*(-G113 + G333) + PDstandardNth1kzz - + PDstandardNth3kxz) + gu22*(-(kxzL*G122) + kxyL*G123 + kyyL*G223 - + kzzL*G322 + kyzL*(-G222 + G323) + PDstandardNth2kyz - + PDstandardNth3kyy) + gu32*(-(kxzL*G123) + kxyL*G133 + kyyL*G233 - + kzzL*G323 + kyzL*(-G223 + G333) + PDstandardNth2kzz - + PDstandardNth3kyz) - 8*Pi*S3; /* Copy local copies back to grid functions */ H[index] = HL; @@ -491,7 +499,7 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con M2[index] = M2L; M3[index] = M3L; } - LC_ENDLOOP3 (ML_ADMConstraints_O2); + CCTK_ENDLOOP3(ML_ADMConstraints_O2); } extern "C" void ML_ADMConstraints_O2(CCTK_ARGUMENTS) @@ -510,12 +518,18 @@ extern "C" void ML_ADMConstraints_O2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints_O2::ML_Ham","ML_ADMConstraints_O2::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints_O2::ML_Ham", + "ML_ADMConstraints_O2::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints_O2", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints_O2", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_O2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_O2_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints_O2/src/make.code.defn b/ML_ADMConstraints_O2/src/make.code.defn index 85e9132..751150b 100644 --- a/ML_ADMConstraints_O2/src/make.code.defn +++ b/ML_ADMConstraints_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints_O2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints_O2.cc Boundaries.cc diff --git a/ML_ADMQuantities/configuration.ccl b/ML_ADMQuantities/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMQuantities/configuration.ccl +++ b/ML_ADMQuantities/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMQuantities/param.ccl b/ML_ADMQuantities/param.ccl index f9a063c..cdd87d8 100644 --- a/ML_ADMQuantities/param.ccl +++ b/ML_ADMQuantities/param.ccl @@ -47,6 +47,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMQuantities_calc_every "ML_ADMQuantities_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMQuantities/schedule.ccl b/ML_ADMQuantities/schedule.ccl index 273f2d7..cd3e5dd 100644 --- a/ML_ADMQuantities/schedule.ccl +++ b/ML_ADMQuantities/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Jadm[3] +if (timelevels == 1) +{ + STORAGE: ML_Jadm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Jadm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Jadm[3] +} -STORAGE: ML_Madm[3] +if (timelevels == 1) +{ + STORAGE: ML_Madm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Madm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Madm[3] +} schedule ML_ADMQuantities_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMQuantities_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,20 @@ schedule group ML_ADMQuantities_group in MoL_PseudoEvolution after MoL_PostStep schedule ML_ADMQuantities in ML_ADMQuantities_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities::ML_Jadm + WRITES: ML_ADMQuantities::ML_Madm } "ML_ADMQuantities" schedule ML_ADMQuantities_SelectBCs in ML_ADMQuantities_bc_group @@ -68,6 +98,12 @@ schedule ML_ADMQuantities_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_ApplyBCs in MoL_PostStep after ML_ADMQuantities_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities/src/ML_ADMQuantities.cc b/ML_ADMQuantities/src/ML_ADMQuantities.cc index af799ea..4bf679f 100644 --- a/ML_ADMQuantities/src/ML_ADMQuantities.cc +++ b/ML_ADMQuantities/src/ML_ADMQuantities.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1ody = INV(dy); CCTK_REAL const p1odz = INV(dz); @@ -94,7 +93,7 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMQuantities, + CCTK_LOOP3(ML_ADMQuantities, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -527,8 +526,8 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d 2*gtu32*PDstandardNth23gt22 + 2*(Gt112*Gtlu121 + Gt122*Gtlu122 + Gt123*Gtlu123 + Gt312*Gtlu321 + Gt322*Gtlu322 + Gt323*Gtlu323 + gt12L*PDstandardNth2Xt1) + 2*gt22L*PDstandardNth2Xt2 + - 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 + - 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3); + 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 + + 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3); CCTK_REAL Rt23 = 0.5*(2*(Gt112*Gtlu131 + Gt122*Gtlu132 + Gt123*Gtlu133 + Gt113*Gtlu211 + Gt123*Gtlu212 + Gt133*Gtlu213 + Gt213*Gtlu221 + @@ -559,7 +558,8 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL trRt = gtu11*Rt11 + gtu22*Rt22 + 2*(gtu21*Rt12 + gtu31*Rt13 + gtu32*Rt23) + gtu33*Rt33; - CCTK_REAL ephi = IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); + CCTK_REAL ephi = + IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu21 + At13L*gtu31; @@ -580,9 +580,9 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL Atm33 = At13L*gtu31 + At23L*gtu32 + At33L*gtu33; CCTK_REAL rho = INV(SQR(alphaL))*(eTttL - 2*(beta2L*eTtyL + - beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + beta3L*eTxzL) + - beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + eTyyL*SQR(beta2L) + - eTzzL*SQR(beta3L)); + beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + + beta3L*eTxzL) + beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + + eTyyL*SQR(beta2L) + eTzzL*SQR(beta3L)); CCTK_REAL S1 = (-eTtxL + beta1L*eTxxL + beta2L*eTxyL + beta3L*eTxzL)*INV(alphaL); @@ -593,68 +593,57 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL S3 = (-eTtzL + beta1L*eTxzL + beta2L*eTyzL + beta3L*eTzzL)*INV(alphaL); - CCTK_REAL MadmL = - 0.01989436788648691697111047042156429525431*((-(Gt111*Gtlu111) - - Gt112*Gtlu112 - Gt113*Gtlu113 - Gt211*Gtlu121 - Gt212*Gtlu122 - - Gt213*Gtlu123 - Gt311*Gtlu131 - Gt312*Gtlu132 - Gt313*Gtlu133)*gtu11 + - (-(Gt122*Gtlu112) - Gt123*Gtlu113 - Gt222*Gtlu122 - Gt223*Gtlu123 - - Gt322*Gtlu132 - Gt323*Gtlu133 - Gt111*Gtlu211 + Gt112*(-Gtlu111 - - Gtlu212) - Gt113*Gtlu213 - Gt211*Gtlu221 + Gt212*(-Gtlu121 - Gtlu222) - - Gt213*Gtlu223 - Gt311*Gtlu231 + Gt312*(-Gtlu131 - Gtlu232) - - Gt313*Gtlu233)*gtu21 + (-(Gt112*Gtlu211) - Gt122*Gtlu212 - - Gt123*Gtlu213 - Gt212*Gtlu221 - Gt222*Gtlu222 - Gt223*Gtlu223 - - Gt312*Gtlu231 - Gt322*Gtlu232 - Gt323*Gtlu233)*gtu22 + - (-(Gt123*Gtlu112) - Gt133*Gtlu113 - Gt223*Gtlu122 - Gt233*Gtlu123 - - Gt323*Gtlu132 - Gt333*Gtlu133 - Gt111*Gtlu311 - Gt112*Gtlu312 + - Gt113*(-Gtlu111 - Gtlu313) - Gt211*Gtlu321 - Gt212*Gtlu322 + - Gt213*(-Gtlu121 - Gtlu323) - Gt311*Gtlu331 - Gt312*Gtlu332 + - Gt313*(-Gtlu131 - Gtlu333))*gtu31 + (-(Gt113*Gtlu211) - Gt133*Gtlu213 - - Gt213*Gtlu221 - Gt233*Gtlu223 - Gt313*Gtlu231 - Gt333*Gtlu233 - - Gt112*Gtlu311 - Gt122*Gtlu312 + Gt123*(-Gtlu212 - Gtlu313) - - Gt212*Gtlu321 - Gt222*Gtlu322 + Gt223*(-Gtlu222 - Gtlu323) - - Gt312*Gtlu331 - Gt322*Gtlu332 + Gt323*(-Gtlu232 - Gtlu333))*gtu32 + - (-(Gt113*Gtlu311) - Gt123*Gtlu312 - Gt133*Gtlu313 - Gt213*Gtlu321 - - Gt223*Gtlu322 - Gt233*Gtlu323 - Gt313*Gtlu331 - Gt323*Gtlu332 - - Gt333*Gtlu333)*gtu33 + trRt - ephi*trRt + pow(ephi,5)*(2*Atm12*Atm21 + - 2.*Atm13*Atm31 + 2.*Atm23*Atm32 + - 50.26548245743669181540229413247204614715*rho + SQR(Atm11) + SQR(Atm22) - + SQR(Atm33) - 0.6666666666666666666666666666666666666667*SQR(trKL))); + CCTK_REAL MadmL = -0.0625*INV(Pi)*((Gt111*Gtlu111 + Gt112*Gtlu112 + + Gt113*Gtlu113 + Gt211*Gtlu121 + Gt212*Gtlu122 + Gt213*Gtlu123 + + Gt311*Gtlu131 + Gt312*Gtlu132 + Gt313*Gtlu133)*gtu11 + (Gt122*Gtlu112 + + Gt123*Gtlu113 + Gt222*Gtlu122 + Gt223*Gtlu123 + Gt322*Gtlu132 + + Gt323*Gtlu133 + Gt111*Gtlu211 + Gt112*(Gtlu111 + Gtlu212) + + Gt113*Gtlu213 + Gt211*Gtlu221 + Gt212*(Gtlu121 + Gtlu222) + + Gt213*Gtlu223 + Gt311*Gtlu231 + Gt312*(Gtlu131 + Gtlu232) + + Gt313*Gtlu233)*gtu21 + (Gt112*Gtlu211 + Gt122*Gtlu212 + Gt123*Gtlu213 + + Gt212*Gtlu221 + Gt222*Gtlu222 + Gt223*Gtlu223 + Gt312*Gtlu231 + + Gt322*Gtlu232 + Gt323*Gtlu233)*gtu22 + (Gt123*Gtlu112 + Gt133*Gtlu113 + + Gt223*Gtlu122 + Gt233*Gtlu123 + Gt323*Gtlu132 + Gt333*Gtlu133 + + Gt111*Gtlu311 + Gt112*Gtlu312 + Gt113*(Gtlu111 + Gtlu313) + + Gt211*Gtlu321 + Gt212*Gtlu322 + Gt213*(Gtlu121 + Gtlu323) + + Gt311*Gtlu331 + Gt312*Gtlu332 + Gt313*(Gtlu131 + Gtlu333))*gtu31 + + (Gt113*Gtlu211 + Gt133*Gtlu213 + Gt213*Gtlu221 + Gt233*Gtlu223 + + Gt313*Gtlu231 + Gt333*Gtlu233 + Gt112*Gtlu311 + Gt122*Gtlu312 + + Gt123*(Gtlu212 + Gtlu313) + Gt212*Gtlu321 + Gt222*Gtlu322 + + Gt223*(Gtlu222 + Gtlu323) + Gt312*Gtlu331 + Gt322*Gtlu332 + + Gt323*(Gtlu232 + Gtlu333))*gtu32 + (Gt113*Gtlu311 + Gt123*Gtlu312 + + Gt133*Gtlu313 + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323 + + Gt313*Gtlu331 + Gt323*Gtlu332 + Gt333*Gtlu333)*gtu33 + (-1 + ephi)*trRt + - pow(ephi,5)*(2*(Atm12*Atm21 + Atm13*Atm31 + Atm23*Atm32) + 16*Pi*rho + - 0.666666666666666666666666666667*SQR(trKL) + SQR(Atm11) + + SQR(Atm22) + SQR(Atm33))); CCTK_REAL Jadm1L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) + - 0.02652582384864922262814729389541906033908*PDstandardNth3trK + - 1.*S3)*yL - 0.03978873577297383394222094084312859050861*(Atm32 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*yL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) - - 0.02652582384864922262814729389541906033908*PDstandardNth2trK - - 1.*S2)*zL + 0.03978873577297383394222094084312859050861*(Atm23 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(-4*zL*PDstandardNth2trK + + 4*yL*PDstandardNth3trK + 3*(At11L*(zL*dgtu112 - yL*dgtu113) + + At22L*(zL*dgtu222 - yL*dgtu223) + 2*(Atm23 + + zL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) - 2*(Atm32 + + yL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(zL*dgtu332 - yL*dgtu333) + Pi*(-16*zL*S2 + + 16*yL*S3)))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm2L = - ((0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) - - 0.02652582384864922262814729389541906033908*PDstandardNth3trK - - 1.*S3)*xL + 0.03978873577297383394222094084312859050861*(Atm31 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*xL) + - (-0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) + - 0.02652582384864922262814729389541906033908*PDstandardNth1trK + - 1.*S1)*zL - 0.03978873577297383394222094084312859050861*(Atm13 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(4*zL*PDstandardNth1trK - + 4*xL*PDstandardNth3trK + 3*(At11L*(-(zL*dgtu111) + xL*dgtu113) + + At22L*(-(zL*dgtu221) + xL*dgtu223) - 2*(Atm13 + + zL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) + 2*(Atm31 + + xL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(-(zL*dgtu331) + xL*dgtu333) + Pi*(16*zL*S1 - + 16*xL*S3)))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm3L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) + - 0.02652582384864922262814729389541906033908*PDstandardNth2trK + - 1.*S2)*xL - 0.03978873577297383394222094084312859050861*(Atm21 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*xL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) - - 0.02652582384864922262814729389541906033908*PDstandardNth1trK - - 1.*S1)*yL + 0.03978873577297383394222094084312859050861*(Atm12 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*yL))*pow(ephi,6); + 0.0208333333333333333333333333333*(-4*yL*PDstandardNth1trK + + 4*xL*PDstandardNth2trK + 3*(At11L*(yL*dgtu111 - xL*dgtu112) + + At22L*(yL*dgtu221 - xL*dgtu222) + 2*(Atm12 + + yL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) - 2*(Atm21 + + xL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) + + At33L*(yL*dgtu331 - xL*dgtu332) + Pi*(-16*yL*S1 + + 16*xL*S2)))*INV(Pi)*pow(ephi,6); /* Copy local copies back to grid functions */ Jadm1[index] = Jadm1L; @@ -662,7 +651,7 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - LC_ENDLOOP3 (ML_ADMQuantities); + CCTK_ENDLOOP3(ML_ADMQuantities); } extern "C" void ML_ADMQuantities(CCTK_ARGUMENTS) @@ -681,12 +670,23 @@ extern "C" void ML_ADMQuantities(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities::ML_Jadm","ML_ADMQuantities::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities::ML_Jadm", + "ML_ADMQuantities::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities/src/make.code.defn b/ML_ADMQuantities/src/make.code.defn index 65932ec..c0f1ce4 100644 --- a/ML_ADMQuantities/src/make.code.defn +++ b/ML_ADMQuantities/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities.cc Boundaries.cc diff --git a/ML_ADMQuantities_MP/configuration.ccl b/ML_ADMQuantities_MP/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMQuantities_MP/configuration.ccl +++ b/ML_ADMQuantities_MP/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMQuantities_MP/param.ccl b/ML_ADMQuantities_MP/param.ccl index 62fb349..47f8215 100644 --- a/ML_ADMQuantities_MP/param.ccl +++ b/ML_ADMQuantities_MP/param.ccl @@ -50,6 +50,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMQuantities_MP_calc_every "ML_ADMQuantities_MP_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMQuantities_MP/schedule.ccl b/ML_ADMQuantities_MP/schedule.ccl index f764cba..6f7e000 100644 --- a/ML_ADMQuantities_MP/schedule.ccl +++ b/ML_ADMQuantities_MP/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Jadm[3] +if (timelevels == 1) +{ + STORAGE: ML_Jadm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Jadm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Jadm[3] +} -STORAGE: ML_Madm[3] +if (timelevels == 1) +{ + STORAGE: ML_Madm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Madm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Madm[3] +} schedule ML_ADMQuantities_MP_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMQuantities_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,20 @@ schedule group ML_ADMQuantities_MP_group in MoL_PseudoEvolution after MoL_PostSt schedule ML_ADMQuantities_MP in ML_ADMQuantities_MP_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities_MP::ML_Jadm + WRITES: ML_ADMQuantities_MP::ML_Madm } "ML_ADMQuantities_MP" schedule ML_ADMQuantities_MP_SelectBCs in ML_ADMQuantities_MP_bc_group @@ -68,6 +98,12 @@ schedule ML_ADMQuantities_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_MP_ApplyBCs in MoL_PostStep after ML_ADMQuantities_MP_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc b/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc index 7ae000d..fd37c79 100644 --- a/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc +++ b/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1ody = INV(dy); CCTK_REAL const p1odz = INV(dz); @@ -139,7 +138,7 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMQuantities_MP, + CCTK_LOOP3(ML_ADMQuantities_MP, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -410,17 +409,17 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons JacPDstandardNth1gt33 = J11L*PDstandardNth1gt33 + J21L*PDstandardNth2gt33 + J31L*PDstandardNth3gt33; - JacPDstandardNth1trK = J11L*PDstandardNth1trK + J21L*PDstandardNth2trK - + J31L*PDstandardNth3trK; + JacPDstandardNth1trK = J11L*PDstandardNth1trK + + J21L*PDstandardNth2trK + J31L*PDstandardNth3trK; - JacPDstandardNth1Xt1 = J11L*PDstandardNth1Xt1 + J21L*PDstandardNth2Xt1 - + J31L*PDstandardNth3Xt1; + JacPDstandardNth1Xt1 = J11L*PDstandardNth1Xt1 + + J21L*PDstandardNth2Xt1 + J31L*PDstandardNth3Xt1; - JacPDstandardNth1Xt2 = J11L*PDstandardNth1Xt2 + J21L*PDstandardNth2Xt2 - + J31L*PDstandardNth3Xt2; + JacPDstandardNth1Xt2 = J11L*PDstandardNth1Xt2 + + J21L*PDstandardNth2Xt2 + J31L*PDstandardNth3Xt2; - JacPDstandardNth1Xt3 = J11L*PDstandardNth1Xt3 + J21L*PDstandardNth2Xt3 - + J31L*PDstandardNth3Xt3; + JacPDstandardNth1Xt3 = J11L*PDstandardNth1Xt3 + + J21L*PDstandardNth2Xt3 + J31L*PDstandardNth3Xt3; JacPDstandardNth2gt11 = J12L*PDstandardNth1gt11 + J22L*PDstandardNth2gt11 + J32L*PDstandardNth3gt11; @@ -440,17 +439,17 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons JacPDstandardNth2gt33 = J12L*PDstandardNth1gt33 + J22L*PDstandardNth2gt33 + J32L*PDstandardNth3gt33; - JacPDstandardNth2trK = J12L*PDstandardNth1trK + J22L*PDstandardNth2trK - + J32L*PDstandardNth3trK; + JacPDstandardNth2trK = J12L*PDstandardNth1trK + + J22L*PDstandardNth2trK + J32L*PDstandardNth3trK; - JacPDstandardNth2Xt1 = J12L*PDstandardNth1Xt1 + J22L*PDstandardNth2Xt1 - + J32L*PDstandardNth3Xt1; + JacPDstandardNth2Xt1 = J12L*PDstandardNth1Xt1 + + J22L*PDstandardNth2Xt1 + J32L*PDstandardNth3Xt1; - JacPDstandardNth2Xt2 = J12L*PDstandardNth1Xt2 + J22L*PDstandardNth2Xt2 - + J32L*PDstandardNth3Xt2; + JacPDstandardNth2Xt2 = J12L*PDstandardNth1Xt2 + + J22L*PDstandardNth2Xt2 + J32L*PDstandardNth3Xt2; - JacPDstandardNth2Xt3 = J12L*PDstandardNth1Xt3 + J22L*PDstandardNth2Xt3 - + J32L*PDstandardNth3Xt3; + JacPDstandardNth2Xt3 = J12L*PDstandardNth1Xt3 + + J22L*PDstandardNth2Xt3 + J32L*PDstandardNth3Xt3; JacPDstandardNth3gt11 = J13L*PDstandardNth1gt11 + J23L*PDstandardNth2gt11 + J33L*PDstandardNth3gt11; @@ -470,17 +469,17 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons JacPDstandardNth3gt33 = J13L*PDstandardNth1gt33 + J23L*PDstandardNth2gt33 + J33L*PDstandardNth3gt33; - JacPDstandardNth3trK = J13L*PDstandardNth1trK + J23L*PDstandardNth2trK - + J33L*PDstandardNth3trK; + JacPDstandardNth3trK = J13L*PDstandardNth1trK + + J23L*PDstandardNth2trK + J33L*PDstandardNth3trK; - JacPDstandardNth3Xt1 = J13L*PDstandardNth1Xt1 + J23L*PDstandardNth2Xt1 - + J33L*PDstandardNth3Xt1; + JacPDstandardNth3Xt1 = J13L*PDstandardNth1Xt1 + + J23L*PDstandardNth2Xt1 + J33L*PDstandardNth3Xt1; - JacPDstandardNth3Xt2 = J13L*PDstandardNth1Xt2 + J23L*PDstandardNth2Xt2 - + J33L*PDstandardNth3Xt2; + JacPDstandardNth3Xt2 = J13L*PDstandardNth1Xt2 + + J23L*PDstandardNth2Xt2 + J33L*PDstandardNth3Xt2; - JacPDstandardNth3Xt3 = J13L*PDstandardNth1Xt3 + J23L*PDstandardNth2Xt3 - + J33L*PDstandardNth3Xt3; + JacPDstandardNth3Xt3 = J13L*PDstandardNth1Xt3 + + J23L*PDstandardNth2Xt3 + J33L*PDstandardNth3Xt3; JacPDstandardNth11gt11 = dJ111L*PDstandardNth1gt11 + 2*(J11L*(J21L*PDstandardNth12gt11 + J31L*PDstandardNth13gt11) + @@ -1342,8 +1341,8 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons JacPDstandardNth31gt13) + gtu32*(-JacPDstandardNth23gt13 - JacPDstandardNth32gt13) - gtu33*JacPDstandardNth33gt13 + gt11L*JacPDstandardNth3Xt1 + gt12L*JacPDstandardNth3Xt2 + - gt13L*JacPDstandardNth3Xt3 + Gtl113*Xtn1 + Gtl311*Xtn1 + Gtl123*Xtn2 + - Gtl312*Xtn2 + Gtl133*Xtn3 + Gtl313*Xtn3); + gt13L*JacPDstandardNth3Xt3 + Gtl113*Xtn1 + Gtl311*Xtn1 + Gtl123*Xtn2 + + Gtl312*Xtn2 + Gtl133*Xtn3 + Gtl313*Xtn3); CCTK_REAL Rt22 = 0.5*(6*(Gt212*Gtlu221 + Gt222*Gtlu222 + Gt223*Gtlu223) + 4*(Gt123*Gtlu213 + Gt312*Gtlu231 + Gt322*Gtlu232 + @@ -1371,8 +1370,9 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons gtu31*(-JacPDstandardNth13gt23 - JacPDstandardNth31gt23) + gtu32*(-JacPDstandardNth23gt23 - JacPDstandardNth32gt23) - gtu33*JacPDstandardNth33gt23 + gt12L*JacPDstandardNth3Xt1 + - gt22L*JacPDstandardNth3Xt2 + gt23L*JacPDstandardNth3Xt3 + Gtl213*Xtn1 + - Gtl312*Xtn1 + Gtl223*Xtn2 + Gtl322*Xtn2 + Gtl233*Xtn3 + Gtl323*Xtn3); + gt22L*JacPDstandardNth3Xt2 + gt23L*JacPDstandardNth3Xt3 + + Gtl213*Xtn1 + Gtl312*Xtn1 + Gtl223*Xtn2 + Gtl322*Xtn2 + Gtl233*Xtn3 + + Gtl323*Xtn3); CCTK_REAL Rt33 = 0.5*(4*(Gt133*Gtlu313 + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323) + 6*(Gt313*Gtlu331 + Gt323*Gtlu332 + Gt333*Gtlu333) - @@ -1389,7 +1389,8 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL trRt = gtu11*Rt11 + gtu22*Rt22 + 2*(gtu21*Rt12 + gtu31*Rt13 + gtu32*Rt23) + gtu33*Rt33; - CCTK_REAL ephi = IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); + CCTK_REAL ephi = + IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu21 + At13L*gtu31; @@ -1410,9 +1411,9 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL Atm33 = At13L*gtu31 + At23L*gtu32 + At33L*gtu33; CCTK_REAL rho = INV(SQR(alphaL))*(eTttL - 2*(beta2L*eTtyL + - beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + beta3L*eTxzL) + - beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + eTyyL*SQR(beta2L) + - eTzzL*SQR(beta3L)); + beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + + beta3L*eTxzL) + beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + + eTyyL*SQR(beta2L) + eTzzL*SQR(beta3L)); CCTK_REAL S1 = (-eTtxL + beta1L*eTxxL + beta2L*eTxyL + beta3L*eTxzL)*INV(alphaL); @@ -1423,68 +1424,57 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL S3 = (-eTtzL + beta1L*eTxzL + beta2L*eTyzL + beta3L*eTzzL)*INV(alphaL); - CCTK_REAL MadmL = - 0.01989436788648691697111047042156429525431*((-(Gt111*Gtlu111) - - Gt112*Gtlu112 - Gt113*Gtlu113 - Gt211*Gtlu121 - Gt212*Gtlu122 - - Gt213*Gtlu123 - Gt311*Gtlu131 - Gt312*Gtlu132 - Gt313*Gtlu133)*gtu11 + - (-(Gt122*Gtlu112) - Gt123*Gtlu113 - Gt222*Gtlu122 - Gt223*Gtlu123 - - Gt322*Gtlu132 - Gt323*Gtlu133 - Gt111*Gtlu211 + Gt112*(-Gtlu111 - - Gtlu212) - Gt113*Gtlu213 - Gt211*Gtlu221 + Gt212*(-Gtlu121 - Gtlu222) - - Gt213*Gtlu223 - Gt311*Gtlu231 + Gt312*(-Gtlu131 - Gtlu232) - - Gt313*Gtlu233)*gtu21 + (-(Gt112*Gtlu211) - Gt122*Gtlu212 - - Gt123*Gtlu213 - Gt212*Gtlu221 - Gt222*Gtlu222 - Gt223*Gtlu223 - - Gt312*Gtlu231 - Gt322*Gtlu232 - Gt323*Gtlu233)*gtu22 + - (-(Gt123*Gtlu112) - Gt133*Gtlu113 - Gt223*Gtlu122 - Gt233*Gtlu123 - - Gt323*Gtlu132 - Gt333*Gtlu133 - Gt111*Gtlu311 - Gt112*Gtlu312 + - Gt113*(-Gtlu111 - Gtlu313) - Gt211*Gtlu321 - Gt212*Gtlu322 + - Gt213*(-Gtlu121 - Gtlu323) - Gt311*Gtlu331 - Gt312*Gtlu332 + - Gt313*(-Gtlu131 - Gtlu333))*gtu31 + (-(Gt113*Gtlu211) - Gt133*Gtlu213 - - Gt213*Gtlu221 - Gt233*Gtlu223 - Gt313*Gtlu231 - Gt333*Gtlu233 - - Gt112*Gtlu311 - Gt122*Gtlu312 + Gt123*(-Gtlu212 - Gtlu313) - - Gt212*Gtlu321 - Gt222*Gtlu322 + Gt223*(-Gtlu222 - Gtlu323) - - Gt312*Gtlu331 - Gt322*Gtlu332 + Gt323*(-Gtlu232 - Gtlu333))*gtu32 + - (-(Gt113*Gtlu311) - Gt123*Gtlu312 - Gt133*Gtlu313 - Gt213*Gtlu321 - - Gt223*Gtlu322 - Gt233*Gtlu323 - Gt313*Gtlu331 - Gt323*Gtlu332 - - Gt333*Gtlu333)*gtu33 + trRt - ephi*trRt + pow(ephi,5)*(2*Atm12*Atm21 + - 2.*Atm13*Atm31 + 2.*Atm23*Atm32 + - 50.26548245743669181540229413247204614715*rho + SQR(Atm11) + SQR(Atm22) - + SQR(Atm33) - 0.6666666666666666666666666666666666666667*SQR(trKL))); + CCTK_REAL MadmL = -0.0625*INV(Pi)*((Gt111*Gtlu111 + Gt112*Gtlu112 + + Gt113*Gtlu113 + Gt211*Gtlu121 + Gt212*Gtlu122 + Gt213*Gtlu123 + + Gt311*Gtlu131 + Gt312*Gtlu132 + Gt313*Gtlu133)*gtu11 + (Gt122*Gtlu112 + + Gt123*Gtlu113 + Gt222*Gtlu122 + Gt223*Gtlu123 + Gt322*Gtlu132 + + Gt323*Gtlu133 + Gt111*Gtlu211 + Gt112*(Gtlu111 + Gtlu212) + + Gt113*Gtlu213 + Gt211*Gtlu221 + Gt212*(Gtlu121 + Gtlu222) + + Gt213*Gtlu223 + Gt311*Gtlu231 + Gt312*(Gtlu131 + Gtlu232) + + Gt313*Gtlu233)*gtu21 + (Gt112*Gtlu211 + Gt122*Gtlu212 + Gt123*Gtlu213 + + Gt212*Gtlu221 + Gt222*Gtlu222 + Gt223*Gtlu223 + Gt312*Gtlu231 + + Gt322*Gtlu232 + Gt323*Gtlu233)*gtu22 + (Gt123*Gtlu112 + Gt133*Gtlu113 + + Gt223*Gtlu122 + Gt233*Gtlu123 + Gt323*Gtlu132 + Gt333*Gtlu133 + + Gt111*Gtlu311 + Gt112*Gtlu312 + Gt113*(Gtlu111 + Gtlu313) + + Gt211*Gtlu321 + Gt212*Gtlu322 + Gt213*(Gtlu121 + Gtlu323) + + Gt311*Gtlu331 + Gt312*Gtlu332 + Gt313*(Gtlu131 + Gtlu333))*gtu31 + + (Gt113*Gtlu211 + Gt133*Gtlu213 + Gt213*Gtlu221 + Gt233*Gtlu223 + + Gt313*Gtlu231 + Gt333*Gtlu233 + Gt112*Gtlu311 + Gt122*Gtlu312 + + Gt123*(Gtlu212 + Gtlu313) + Gt212*Gtlu321 + Gt222*Gtlu322 + + Gt223*(Gtlu222 + Gtlu323) + Gt312*Gtlu331 + Gt322*Gtlu332 + + Gt323*(Gtlu232 + Gtlu333))*gtu32 + (Gt113*Gtlu311 + Gt123*Gtlu312 + + Gt133*Gtlu313 + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323 + + Gt313*Gtlu331 + Gt323*Gtlu332 + Gt333*Gtlu333)*gtu33 + (-1 + ephi)*trRt + - pow(ephi,5)*(2*(Atm12*Atm21 + Atm13*Atm31 + Atm23*Atm32) + 16*Pi*rho + - 0.666666666666666666666666666667*SQR(trKL) + SQR(Atm11) + + SQR(Atm22) + SQR(Atm33))); CCTK_REAL Jadm1L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) + - 0.02652582384864922262814729389541906033908*JacPDstandardNth3trK + - 1.*S3)*yL - 0.03978873577297383394222094084312859050861*(Atm32 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*yL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) - - 0.02652582384864922262814729389541906033908*JacPDstandardNth2trK - - 1.*S2)*zL + 0.03978873577297383394222094084312859050861*(Atm23 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(At11L*(3*zL*dgtu112 - + 3*yL*dgtu113) + At22L*(3*zL*dgtu222 - 3*yL*dgtu223) + 6*(Atm23 + + zL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) - + 6*(Atm32 + yL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(3*zL*dgtu332 - 3*yL*dgtu333) + + zL*(-4*JacPDstandardNth2trK - 48*Pi*S2) + + yL*(4*JacPDstandardNth3trK + 48*Pi*S3))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm2L = - ((0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) - - 0.02652582384864922262814729389541906033908*JacPDstandardNth3trK - - 1.*S3)*xL + 0.03978873577297383394222094084312859050861*(Atm31 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*xL) + - (-0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) + - 0.02652582384864922262814729389541906033908*JacPDstandardNth1trK + - 1.*S1)*zL - 0.03978873577297383394222094084312859050861*(Atm13 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(At11L*(-3*zL*dgtu111 + + 3*xL*dgtu113) + At22L*(-3*zL*dgtu221 + 3*xL*dgtu223) - 6*(Atm13 + + zL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) + + 6*(Atm31 + xL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(-3*zL*dgtu331 + 3*xL*dgtu333) + + zL*(4*JacPDstandardNth1trK + 48*Pi*S1) + + xL*(-4*JacPDstandardNth3trK - 48*Pi*S3))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm3L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) + - 0.02652582384864922262814729389541906033908*JacPDstandardNth2trK + - 1.*S2)*xL - 0.03978873577297383394222094084312859050861*(Atm21 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*xL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) - - 0.02652582384864922262814729389541906033908*JacPDstandardNth1trK - - 1.*S1)*yL + 0.03978873577297383394222094084312859050861*(Atm12 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*yL))*pow(ephi,6); + 0.0208333333333333333333333333333*(At11L*(3*yL*dgtu111 - + 3*xL*dgtu112) + At22L*(3*yL*dgtu221 - 3*xL*dgtu222) + 6*(Atm12 + + yL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) - + 6*(Atm21 + xL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) + + At33L*(3*yL*dgtu331 - 3*xL*dgtu332) + + yL*(-4*JacPDstandardNth1trK - 48*Pi*S1) + + xL*(4*JacPDstandardNth2trK + 48*Pi*S2))*INV(Pi)*pow(ephi,6); /* Copy local copies back to grid functions */ Jadm1[index] = Jadm1L; @@ -1492,7 +1482,7 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - LC_ENDLOOP3 (ML_ADMQuantities_MP); + CCTK_ENDLOOP3(ML_ADMQuantities_MP); } extern "C" void ML_ADMQuantities_MP(CCTK_ARGUMENTS) @@ -1511,12 +1501,23 @@ extern "C" void ML_ADMQuantities_MP(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities_MP::ML_Jadm","ML_ADMQuantities_MP::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities_MP::ML_Jadm", + "ML_ADMQuantities_MP::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities_MP", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities_MP", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_MP_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_MP_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities_MP/src/make.code.defn b/ML_ADMQuantities_MP/src/make.code.defn index 559bbd8..4ac9711 100644 --- a/ML_ADMQuantities_MP/src/make.code.defn +++ b/ML_ADMQuantities_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities_MP.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities_MP.cc Boundaries.cc diff --git a/ML_ADMQuantities_O2/configuration.ccl b/ML_ADMQuantities_O2/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_ADMQuantities_O2/configuration.ccl +++ b/ML_ADMQuantities_O2/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_ADMQuantities_O2/param.ccl b/ML_ADMQuantities_O2/param.ccl index 063c99c..84adaea 100644 --- a/ML_ADMQuantities_O2/param.ccl +++ b/ML_ADMQuantities_O2/param.ccl @@ -47,6 +47,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_ADMQuantities_O2_calc_every "ML_ADMQuantities_O2_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_ADMQuantities_O2/schedule.ccl b/ML_ADMQuantities_O2/schedule.ccl index fe37d31..0c73b79 100644 --- a/ML_ADMQuantities_O2/schedule.ccl +++ b/ML_ADMQuantities_O2/schedule.ccl @@ -1,9 +1,31 @@ # File produced by Kranc -STORAGE: ML_Jadm[3] +if (timelevels == 1) +{ + STORAGE: ML_Jadm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Jadm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Jadm[3] +} -STORAGE: ML_Madm[3] +if (timelevels == 1) +{ + STORAGE: ML_Madm[1] +} +if (timelevels == 2) +{ + STORAGE: ML_Madm[2] +} +if (timelevels == 3) +{ + STORAGE: ML_Madm[3] +} schedule ML_ADMQuantities_O2_Startup at STARTUP { @@ -11,12 +33,6 @@ schedule ML_ADMQuantities_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -31,6 +47,20 @@ schedule group ML_ADMQuantities_O2_group in MoL_PseudoEvolution after MoL_PostSt schedule ML_ADMQuantities_O2 in ML_ADMQuantities_O2_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities_O2::ML_Jadm + WRITES: ML_ADMQuantities_O2::ML_Madm } "ML_ADMQuantities_O2" schedule ML_ADMQuantities_O2_SelectBCs in ML_ADMQuantities_O2_bc_group @@ -68,6 +98,12 @@ schedule ML_ADMQuantities_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_O2_ApplyBCs in MoL_PostStep after ML_ADMQuantities_O2_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc b/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc index 3d0042e..84f71dd 100644 --- a/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc +++ b/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody = INV(dy); @@ -97,7 +96,7 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_ADMQuantities_O2, + CCTK_LOOP3(ML_ADMQuantities_O2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -530,8 +529,8 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons 2*gtu32*PDstandardNth23gt22 + 2*(Gt112*Gtlu121 + Gt122*Gtlu122 + Gt123*Gtlu123 + Gt312*Gtlu321 + Gt322*Gtlu322 + Gt323*Gtlu323 + gt12L*PDstandardNth2Xt1) + 2*gt22L*PDstandardNth2Xt2 + - 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 + - 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3); + 2*gt23L*PDstandardNth2Xt3 - gtu33*PDstandardNth33gt22 + 2*Gtl212*Xtn1 + + 2*Gtl222*Xtn2 + 2*Gtl223*Xtn3); CCTK_REAL Rt23 = 0.5*(2*(Gt112*Gtlu131 + Gt122*Gtlu132 + Gt123*Gtlu133 + Gt113*Gtlu211 + Gt123*Gtlu212 + Gt133*Gtlu213 + Gt213*Gtlu221 + @@ -562,7 +561,8 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL trRt = gtu11*Rt11 + gtu22*Rt22 + 2*(gtu21*Rt12 + gtu31*Rt13 + gtu32*Rt23) + gtu33*Rt33; - CCTK_REAL ephi = IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); + CCTK_REAL ephi = + IfThen(conformalMethod,INV(sqrt(phiL)),exp(phiL)); CCTK_REAL Atm11 = At11L*gtu11 + At12L*gtu21 + At13L*gtu31; @@ -583,9 +583,9 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL Atm33 = At13L*gtu31 + At23L*gtu32 + At33L*gtu33; CCTK_REAL rho = INV(SQR(alphaL))*(eTttL - 2*(beta2L*eTtyL + - beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + beta3L*eTxzL) + - beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + eTyyL*SQR(beta2L) + - eTzzL*SQR(beta3L)); + beta3L*eTtzL) + 2*(beta1L*(-eTtxL + beta2L*eTxyL + + beta3L*eTxzL) + beta2L*beta3L*eTyzL) + eTxxL*SQR(beta1L) + + eTyyL*SQR(beta2L) + eTzzL*SQR(beta3L)); CCTK_REAL S1 = (-eTtxL + beta1L*eTxxL + beta2L*eTxyL + beta3L*eTxzL)*INV(alphaL); @@ -596,68 +596,57 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL S3 = (-eTtzL + beta1L*eTxzL + beta2L*eTyzL + beta3L*eTzzL)*INV(alphaL); - CCTK_REAL MadmL = - 0.01989436788648691697111047042156429525431*((-(Gt111*Gtlu111) - - Gt112*Gtlu112 - Gt113*Gtlu113 - Gt211*Gtlu121 - Gt212*Gtlu122 - - Gt213*Gtlu123 - Gt311*Gtlu131 - Gt312*Gtlu132 - Gt313*Gtlu133)*gtu11 + - (-(Gt122*Gtlu112) - Gt123*Gtlu113 - Gt222*Gtlu122 - Gt223*Gtlu123 - - Gt322*Gtlu132 - Gt323*Gtlu133 - Gt111*Gtlu211 + Gt112*(-Gtlu111 - - Gtlu212) - Gt113*Gtlu213 - Gt211*Gtlu221 + Gt212*(-Gtlu121 - Gtlu222) - - Gt213*Gtlu223 - Gt311*Gtlu231 + Gt312*(-Gtlu131 - Gtlu232) - - Gt313*Gtlu233)*gtu21 + (-(Gt112*Gtlu211) - Gt122*Gtlu212 - - Gt123*Gtlu213 - Gt212*Gtlu221 - Gt222*Gtlu222 - Gt223*Gtlu223 - - Gt312*Gtlu231 - Gt322*Gtlu232 - Gt323*Gtlu233)*gtu22 + - (-(Gt123*Gtlu112) - Gt133*Gtlu113 - Gt223*Gtlu122 - Gt233*Gtlu123 - - Gt323*Gtlu132 - Gt333*Gtlu133 - Gt111*Gtlu311 - Gt112*Gtlu312 + - Gt113*(-Gtlu111 - Gtlu313) - Gt211*Gtlu321 - Gt212*Gtlu322 + - Gt213*(-Gtlu121 - Gtlu323) - Gt311*Gtlu331 - Gt312*Gtlu332 + - Gt313*(-Gtlu131 - Gtlu333))*gtu31 + (-(Gt113*Gtlu211) - Gt133*Gtlu213 - - Gt213*Gtlu221 - Gt233*Gtlu223 - Gt313*Gtlu231 - Gt333*Gtlu233 - - Gt112*Gtlu311 - Gt122*Gtlu312 + Gt123*(-Gtlu212 - Gtlu313) - - Gt212*Gtlu321 - Gt222*Gtlu322 + Gt223*(-Gtlu222 - Gtlu323) - - Gt312*Gtlu331 - Gt322*Gtlu332 + Gt323*(-Gtlu232 - Gtlu333))*gtu32 + - (-(Gt113*Gtlu311) - Gt123*Gtlu312 - Gt133*Gtlu313 - Gt213*Gtlu321 - - Gt223*Gtlu322 - Gt233*Gtlu323 - Gt313*Gtlu331 - Gt323*Gtlu332 - - Gt333*Gtlu333)*gtu33 + trRt - ephi*trRt + pow(ephi,5)*(2*Atm12*Atm21 + - 2.*Atm13*Atm31 + 2.*Atm23*Atm32 + - 50.26548245743669181540229413247204614715*rho + SQR(Atm11) + SQR(Atm22) - + SQR(Atm33) - 0.6666666666666666666666666666666666666667*SQR(trKL))); + CCTK_REAL MadmL = -0.0625*INV(Pi)*((Gt111*Gtlu111 + Gt112*Gtlu112 + + Gt113*Gtlu113 + Gt211*Gtlu121 + Gt212*Gtlu122 + Gt213*Gtlu123 + + Gt311*Gtlu131 + Gt312*Gtlu132 + Gt313*Gtlu133)*gtu11 + (Gt122*Gtlu112 + + Gt123*Gtlu113 + Gt222*Gtlu122 + Gt223*Gtlu123 + Gt322*Gtlu132 + + Gt323*Gtlu133 + Gt111*Gtlu211 + Gt112*(Gtlu111 + Gtlu212) + + Gt113*Gtlu213 + Gt211*Gtlu221 + Gt212*(Gtlu121 + Gtlu222) + + Gt213*Gtlu223 + Gt311*Gtlu231 + Gt312*(Gtlu131 + Gtlu232) + + Gt313*Gtlu233)*gtu21 + (Gt112*Gtlu211 + Gt122*Gtlu212 + Gt123*Gtlu213 + + Gt212*Gtlu221 + Gt222*Gtlu222 + Gt223*Gtlu223 + Gt312*Gtlu231 + + Gt322*Gtlu232 + Gt323*Gtlu233)*gtu22 + (Gt123*Gtlu112 + Gt133*Gtlu113 + + Gt223*Gtlu122 + Gt233*Gtlu123 + Gt323*Gtlu132 + Gt333*Gtlu133 + + Gt111*Gtlu311 + Gt112*Gtlu312 + Gt113*(Gtlu111 + Gtlu313) + + Gt211*Gtlu321 + Gt212*Gtlu322 + Gt213*(Gtlu121 + Gtlu323) + + Gt311*Gtlu331 + Gt312*Gtlu332 + Gt313*(Gtlu131 + Gtlu333))*gtu31 + + (Gt113*Gtlu211 + Gt133*Gtlu213 + Gt213*Gtlu221 + Gt233*Gtlu223 + + Gt313*Gtlu231 + Gt333*Gtlu233 + Gt112*Gtlu311 + Gt122*Gtlu312 + + Gt123*(Gtlu212 + Gtlu313) + Gt212*Gtlu321 + Gt222*Gtlu322 + + Gt223*(Gtlu222 + Gtlu323) + Gt312*Gtlu331 + Gt322*Gtlu332 + + Gt323*(Gtlu232 + Gtlu333))*gtu32 + (Gt113*Gtlu311 + Gt123*Gtlu312 + + Gt133*Gtlu313 + Gt213*Gtlu321 + Gt223*Gtlu322 + Gt233*Gtlu323 + + Gt313*Gtlu331 + Gt323*Gtlu332 + Gt333*Gtlu333)*gtu33 + (-1 + ephi)*trRt + - pow(ephi,5)*(2*(Atm12*Atm21 + Atm13*Atm31 + Atm23*Atm32) + 16*Pi*rho + - 0.666666666666666666666666666667*SQR(trKL) + SQR(Atm11) + + SQR(Atm22) + SQR(Atm33))); CCTK_REAL Jadm1L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) + - 0.02652582384864922262814729389541906033908*PDstandardNth3trK + - 1.*S3)*yL - 0.03978873577297383394222094084312859050861*(Atm32 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*yL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) - - 0.02652582384864922262814729389541906033908*PDstandardNth2trK - - 1.*S2)*zL + 0.03978873577297383394222094084312859050861*(Atm23 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(-4*zL*PDstandardNth2trK + + 4*yL*PDstandardNth3trK + 3*(At11L*(zL*dgtu112 - yL*dgtu113) + + At22L*(zL*dgtu222 - yL*dgtu223) + 2*(Atm23 + + zL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) - 2*(Atm32 + + yL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(zL*dgtu332 - yL*dgtu333) + Pi*(-16*zL*S2 + + 16*yL*S3)))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm2L = - ((0.01989436788648691697111047042156429525431*(At11L*dgtu113 + - At22L*dgtu223 + At33L*dgtu333) - - 0.02652582384864922262814729389541906033908*PDstandardNth3trK - - 1.*S3)*xL + 0.03978873577297383394222094084312859050861*(Atm31 + - (At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)*xL) + - (-0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) + - 0.02652582384864922262814729389541906033908*PDstandardNth1trK + - 1.*S1)*zL - 0.03978873577297383394222094084312859050861*(Atm13 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*zL))*pow(ephi,6); + 0.0208333333333333333333333333333*(4*zL*PDstandardNth1trK - + 4*xL*PDstandardNth3trK + 3*(At11L*(-(zL*dgtu111) + xL*dgtu113) + + At22L*(-(zL*dgtu221) + xL*dgtu223) - 2*(Atm13 + + zL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) + 2*(Atm31 + + xL*(At12L*dgtu213 + At13L*dgtu313 + At23L*dgtu323)) + + At33L*(-(zL*dgtu331) + xL*dgtu333) + Pi*(16*zL*S1 - + 16*xL*S3)))*INV(Pi)*pow(ephi,6); CCTK_REAL Jadm3L = - ((-0.01989436788648691697111047042156429525431*(At11L*dgtu112 + - At22L*dgtu222 + At33L*dgtu332) + - 0.02652582384864922262814729389541906033908*PDstandardNth2trK + - 1.*S2)*xL - 0.03978873577297383394222094084312859050861*(Atm21 + - (At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)*xL) + - (0.01989436788648691697111047042156429525431*(At11L*dgtu111 + - At22L*dgtu221 + At33L*dgtu331) - - 0.02652582384864922262814729389541906033908*PDstandardNth1trK - - 1.*S1)*yL + 0.03978873577297383394222094084312859050861*(Atm12 + - (At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)*yL))*pow(ephi,6); + 0.0208333333333333333333333333333*(-4*yL*PDstandardNth1trK + + 4*xL*PDstandardNth2trK + 3*(At11L*(yL*dgtu111 - xL*dgtu112) + + At22L*(yL*dgtu221 - xL*dgtu222) + 2*(Atm12 + + yL*(At12L*dgtu211 + At13L*dgtu311 + At23L*dgtu321)) - 2*(Atm21 + + xL*(At12L*dgtu212 + At13L*dgtu312 + At23L*dgtu322)) + + At33L*(yL*dgtu331 - xL*dgtu332) + Pi*(-16*yL*S1 + + 16*xL*S2)))*INV(Pi)*pow(ephi,6); /* Copy local copies back to grid functions */ Jadm1[index] = Jadm1L; @@ -665,7 +654,7 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - LC_ENDLOOP3 (ML_ADMQuantities_O2); + CCTK_ENDLOOP3(ML_ADMQuantities_O2); } extern "C" void ML_ADMQuantities_O2(CCTK_ARGUMENTS) @@ -684,12 +673,23 @@ extern "C" void ML_ADMQuantities_O2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities_O2::ML_Jadm","ML_ADMQuantities_O2::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities_O2::ML_Jadm", + "ML_ADMQuantities_O2::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities_O2", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities_O2", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_O2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_O2_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities_O2/src/make.code.defn b/ML_ADMQuantities_O2/src/make.code.defn index f8be0a5..f708d22 100644 --- a/ML_ADMQuantities_O2/src/make.code.defn +++ b/ML_ADMQuantities_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities_O2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities_O2.cc Boundaries.cc diff --git a/ML_BSSN/param.ccl b/ML_BSSN/param.ccl index acf1f14..ab31945 100644 --- a/ML_BSSN/param.ccl +++ b/ML_BSSN/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_Minkowski_calc_every "ML_BSSN_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN/schedule.ccl b/ML_BSSN/schedule.ccl index 9c0fddb..3e657c9 100644 --- a/ML_BSSN/schedule.ccl +++ b/ML_BSSN/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_trace_curv + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_InitGamma AT initial BEFORE ML_BSSN_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_convertFromADMBaseGamma" } schedule ML_BSSN_RHS1 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHS1" schedule ML_BSSN_RHS2 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_curvrhs } "ML_BSSN_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_Dissipation IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Dissipation" } schedule ML_BSSN_Advect IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Advect" schedule ML_BSSN_InitRHS AT analysis BEFORE ML_BSSN_evolCalcGroup { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHSStaticBoundary" } schedule ML_BSSN_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse } "ML_BSSN_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_boundary" } schedule ML_BSSN_convertToADMBase IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_convertToADMBaseFakeDtLapseShift IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_constraints1_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints1 in ML_BSSN_constraints1_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_Ham } "ML_BSSN_constraints1" schedule ML_BSSN_constraints1_SelectBCs in ML_BSSN_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_constraints2_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints2 in ML_BSSN_constraints2_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_cons_detg + WRITES: ML_BSSN::ML_cons_Gamma + WRITES: ML_BSSN::ML_cons_traceA + WRITES: ML_BSSN::ML_mom } "ML_BSSN_constraints2" schedule ML_BSSN_constraints2_SelectBCs in ML_BSSN_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_ApplyBCs in MoL_PostStep after ML_BSSN_SelectBoundConds { # no language specified diff --git a/ML_BSSN/src/ML_BSSN_Advect.cc b/ML_BSSN/src/ML_BSSN_Advect.cc index a34660b..2543f21 100644 --- a/ML_BSSN/src/ML_BSSN_Advect.cc +++ b/ML_BSSN/src/ML_BSSN_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Advect, + LC_LOOP3VEC(ML_BSSN_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1986,7 +1985,7 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir } phirhsL = - kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kadd(phirhsL,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt11,kmadd(beta2L,JacPDupwindNthAnti2gt11,kmadd(beta3L,JacPDupwindNthAnti3gt11,kmadd(JacPDupwindNthSymm1gt11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt11,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt11,kfabs(beta3L)))))))); @@ -2007,16 +2006,16 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt33,kmadd(beta2L,JacPDupwindNthAnti2gt33,kmadd(beta3L,JacPDupwindNthAnti3gt33,kmadd(JacPDupwindNthSymm1gt33,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt33,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt33,kfabs(beta3L)))))))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); trKrhsL = - kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kadd(trKrhsL,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNthAnti1At11,kmadd(beta2L,JacPDupwindNthAnti2At11,kmadd(beta3L,JacPDupwindNthAnti3At11,kmadd(JacPDupwindNthSymm1At11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2At11,kfabs(beta2L),kmul(JacPDupwindNthSymm3At11,kfabs(beta3L)))))))); @@ -2040,7 +2039,7 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir kmadd(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmsub(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(LapseAdvectionCoeff)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -2052,140 +2051,43 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B1,JacPDupwindNthAnti1Xt1),kmadd(beta2L,ksub(JacPDupwindNthAnti2B1,JacPDupwindNthAnti2Xt1),kmadd(beta3L,ksub(JacPDupwindNthAnti3B1,JacPDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B1,JacPDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B1,JacPDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B1,JacPDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B1,kmadd(beta2L,JacPDupwindNthAnti2B1,kmadd(beta3L,JacPDupwindNthAnti3B1,kmadd(JacPDupwindNthSymm1B1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B1,kfabs(beta2L),kmul(JacPDupwindNthSymm3B1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B2,JacPDupwindNthAnti1Xt2),kmadd(beta2L,ksub(JacPDupwindNthAnti2B2,JacPDupwindNthAnti2Xt2),kmadd(beta3L,ksub(JacPDupwindNthAnti3B2,JacPDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B2,JacPDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B2,JacPDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B2,JacPDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B2,kmadd(beta2L,JacPDupwindNthAnti2B2,kmadd(beta3L,JacPDupwindNthAnti3B2,kmadd(JacPDupwindNthSymm1B2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B2,kfabs(beta2L),kmul(JacPDupwindNthSymm3B2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B3,JacPDupwindNthAnti1Xt3),kmadd(beta2L,ksub(JacPDupwindNthAnti2B3,JacPDupwindNthAnti2Xt3),kmadd(beta3L,ksub(JacPDupwindNthAnti3B3,JacPDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B3,JacPDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B3,JacPDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B3,JacPDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Advect); + LC_ENDLOOP3VEC(ML_BSSN_Advect); } extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) @@ -2204,7 +2106,25 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Advect", 18, groups); switch(fdOrder) @@ -2226,7 +2146,7 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Dissipation.cc b/ML_BSSN/src/ML_BSSN_Dissipation.cc index 12b950e..98378a0 100644 --- a/ML_BSSN/src/ML_BSSN_Dissipation.cc +++ b/ML_BSSN/src/ML_BSSN_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Dissipation, + LC_LOOP3VEC(ML_BSSN_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_Dissipation); } extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitGamma.cc b/ML_BSSN/src/ML_BSSN_InitGamma.cc index 8c3057b..647de3b 100644 --- a/ML_BSSN/src/ML_BSSN_InitGamma.cc +++ b/ML_BSSN/src/ML_BSSN_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitGamma, + LC_LOOP3VEC(ML_BSSN_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_InitGamma); } extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitRHS.cc b/ML_BSSN/src/ML_BSSN_InitRHS.cc index d895b38..0f36ec8 100644 --- a/ML_BSSN/src/ML_BSSN_InitRHS.cc +++ b/ML_BSSN/src/ML_BSSN_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitRHS, + LC_LOOP3VEC(ML_BSSN_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_InitRHS); } extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Minkowski.cc b/ML_BSSN/src/ML_BSSN_Minkowski.cc index 906fe72..f1033d0 100644 --- a/ML_BSSN/src/ML_BSSN_Minkowski.cc +++ b/ML_BSSN/src/ML_BSSN_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Minkowski, + LC_LOOP3VEC(ML_BSSN_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_Minkowski); } extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS1.cc b/ML_BSSN/src/ML_BSSN_RHS1.cc index 5b5f80a..3775bb7 100644 --- a/ML_BSSN/src/ML_BSSN_RHS1.cc +++ b/ML_BSSN/src/ML_BSSN_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS1, + LC_LOOP3VEC(ML_BSSN_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_RHS1); } extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS2.cc b/ML_BSSN/src/ML_BSSN_RHS2.cc index 32a0097..fa76e9b 100644 --- a/ML_BSSN/src/ML_BSSN_RHS2.cc +++ b/ML_BSSN/src/ML_BSSN_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS2, + LC_LOOP3VEC(ML_BSSN_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_RHS2); } extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc index 5bc0579..ba11ca7 100644 --- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_RHSStaticBoundary); } extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_boundary.cc b/ML_BSSN/src/ML_BSSN_boundary.cc index af2287a..140da41 100644 --- a/ML_BSSN/src/ML_BSSN_boundary.cc +++ b/ML_BSSN/src/ML_BSSN_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_boundary, + LC_LOOP3VEC(ML_BSSN_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_boundary); + LC_ENDLOOP3VEC(ML_BSSN_boundary); } extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints1.cc b/ML_BSSN/src/ML_BSSN_constraints1.cc index 85c3acd..8f89811 100644 --- a/ML_BSSN/src/ML_BSSN_constraints1.cc +++ b/ML_BSSN/src/ML_BSSN_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints1, + LC_LOOP3VEC(ML_BSSN_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_constraints1); } extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_Ham","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Ham", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints2.cc b/ML_BSSN/src/ML_BSSN_constraints2.cc index 45b63e4..ecc059b 100644 --- a/ML_BSSN/src/ML_BSSN_constraints2.cc +++ b/ML_BSSN/src/ML_BSSN_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints2, + LC_LOOP3VEC(ML_BSSN_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_constraints2); } extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_cons_detg","ML_BSSN::ML_cons_Gamma","ML_BSSN::ML_cons_traceA","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_mom","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_cons_detg", + "ML_BSSN::ML_cons_Gamma", + "ML_BSSN::ML_cons_traceA", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_mom", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc index b4da07f..96db9f9 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBase); } extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc index 04c86f5..37a355b 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -746,7 +745,8 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -754,12 +754,14 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -837,13 +839,13 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -854,60 +856,17 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBaseGamma); } extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -926,7 +885,17 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_metric","ML_BSSN::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -948,7 +917,7 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc index 726085e..de1d9df 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBase); } extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc index 2abc29e..8af669c 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -814,7 +813,8 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -822,15 +822,17 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -839,62 +841,22 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const kmsub(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -913,7 +875,19 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -935,7 +909,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc index a957b55..67e687d 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc index 0d8065d..5944c01 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_enforce.cc b/ML_BSSN/src/ML_BSSN_enforce.cc index c9817d9..50ef72a 100644 --- a/ML_BSSN/src/ML_BSSN_enforce.cc +++ b/ML_BSSN/src/ML_BSSN_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_enforce, + LC_LOOP3VEC(ML_BSSN_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_enforce); + LC_ENDLOOP3VEC(ML_BSSN_enforce); } extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_metric"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/make.code.defn b/ML_BSSN/src/make.code.defn index 5fdc3cf..39c368f 100644 --- a/ML_BSSN/src/make.code.defn +++ b/ML_BSSN/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_MP/param.ccl b/ML_BSSN_MP/param.ccl index 17c2d9d..7b3a3d0 100644 --- a/ML_BSSN_MP/param.ccl +++ b/ML_BSSN_MP/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_MP_Minkowski_calc_every "ML_BSSN_MP_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN_MP/schedule.ccl b/ML_BSSN_MP/schedule.ccl index bc9b072..76a405a 100644 --- a/ML_BSSN_MP/schedule.ccl +++ b/ML_BSSN_MP/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_MP_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_InitGamma AT initial BEFORE ML_BSSN_MP_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma } "ML_BSSN_MP_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma } "ML_BSSN_MP_convertFromADMBaseGamma" } schedule ML_BSSN_MP_RHS1 IN ML_BSSN_MP_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_RHS1" schedule ML_BSSN_MP_RHS2 IN ML_BSSN_MP_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_curvrhs } "ML_BSSN_MP_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_MP_Dissipation IN ML_BSSN_MP_evolCalcGroup AFTER (ML_BSSN_MP_RHS1 ML_BSSN_MP_RHS2) { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_curvrhs + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtlapserhs + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_dtshiftrhs + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_Gammarhs + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_lapserhs + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_log_confacrhs + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_metricrhs + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_shiftrhs + READS: ML_BSSN_MP::ML_trace_curv + READS: ML_BSSN_MP::ML_trace_curvrhs + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_Dissipation" } schedule ML_BSSN_MP_Advect IN ML_BSSN_MP_evolCalcGroup AFTER (ML_BSSN_MP_RHS1 ML_BSSN_MP_RHS2) { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_curvrhs + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtlapserhs + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_dtshiftrhs + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_Gammarhs + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_lapserhs + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_log_confacrhs + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_metricrhs + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_shiftrhs + READS: ML_BSSN_MP::ML_trace_curv + READS: ML_BSSN_MP::ML_trace_curvrhs + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_Advect" schedule ML_BSSN_MP_InitRHS AT analysis BEFORE ML_BSSN_MP_evolCalcGroup { LANG: C + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_MP_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_RHSStaticBoundary" } schedule ML_BSSN_MP_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_lapse } "ML_BSSN_MP_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_MP_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_boundary" } schedule ML_BSSN_MP_convertToADMBase IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_MP_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_MP_convertToADMBaseFakeDtLapseShift IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_MP_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_MP_constraints1 in ML_BSSN_MP_constraints1_group { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_Ham } "ML_BSSN_MP_constraints1" schedule ML_BSSN_MP_constraints1_SelectBCs in ML_BSSN_MP_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_MP_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_MP_constraints2 in ML_BSSN_MP_constraints2_group { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_cons_detg + WRITES: ML_BSSN_MP::ML_cons_Gamma + WRITES: ML_BSSN_MP::ML_cons_traceA + WRITES: ML_BSSN_MP::ML_mom } "ML_BSSN_MP_constraints2" schedule ML_BSSN_MP_constraints2_SelectBCs in ML_BSSN_MP_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_MP_ApplyBCs in MoL_PostStep after ML_BSSN_MP_SelectBoundConds { # no language specified diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc index 129b1be..bb81808 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Advect, + LC_LOOP3VEC(ML_BSSN_MP_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1986,7 +1985,7 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const } phirhsL = - kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kadd(phirhsL,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt11,kmadd(beta2L,JacPDupwindNthAnti2gt11,kmadd(beta3L,JacPDupwindNthAnti3gt11,kmadd(JacPDupwindNthSymm1gt11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt11,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt11,kfabs(beta3L)))))))); @@ -2007,16 +2006,16 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt33,kmadd(beta2L,JacPDupwindNthAnti2gt33,kmadd(beta3L,JacPDupwindNthAnti3gt33,kmadd(JacPDupwindNthSymm1gt33,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt33,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt33,kfabs(beta3L)))))))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); trKrhsL = - kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kadd(trKrhsL,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNthAnti1At11,kmadd(beta2L,JacPDupwindNthAnti2At11,kmadd(beta3L,JacPDupwindNthAnti3At11,kmadd(JacPDupwindNthSymm1At11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2At11,kfabs(beta2L),kmul(JacPDupwindNthSymm3At11,kfabs(beta3L)))))))); @@ -2040,7 +2039,7 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmsub(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(LapseAdvectionCoeff)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -2052,140 +2051,43 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B1,JacPDupwindNthAnti1Xt1),kmadd(beta2L,ksub(JacPDupwindNthAnti2B1,JacPDupwindNthAnti2Xt1),kmadd(beta3L,ksub(JacPDupwindNthAnti3B1,JacPDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B1,JacPDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B1,JacPDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B1,JacPDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B1,kmadd(beta2L,JacPDupwindNthAnti2B1,kmadd(beta3L,JacPDupwindNthAnti3B1,kmadd(JacPDupwindNthSymm1B1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B1,kfabs(beta2L),kmul(JacPDupwindNthSymm3B1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B2,JacPDupwindNthAnti1Xt2),kmadd(beta2L,ksub(JacPDupwindNthAnti2B2,JacPDupwindNthAnti2Xt2),kmadd(beta3L,ksub(JacPDupwindNthAnti3B2,JacPDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B2,JacPDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B2,JacPDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B2,JacPDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B2,kmadd(beta2L,JacPDupwindNthAnti2B2,kmadd(beta3L,JacPDupwindNthAnti3B2,kmadd(JacPDupwindNthSymm1B2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B2,kfabs(beta2L),kmul(JacPDupwindNthSymm3B2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B3,JacPDupwindNthAnti1Xt3),kmadd(beta2L,ksub(JacPDupwindNthAnti2B3,JacPDupwindNthAnti2Xt3),kmadd(beta3L,ksub(JacPDupwindNthAnti3B3,JacPDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B3,JacPDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B3,JacPDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B3,JacPDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Advect); + LC_ENDLOOP3VEC(ML_BSSN_MP_Advect); } extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) @@ -2204,7 +2106,25 @@ extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Advect", 18, groups); switch(fdOrder) @@ -2226,7 +2146,7 @@ extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc index 43a4907..9e85cf5 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Dissipation, + LC_LOOP3VEC(ML_BSSN_MP_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_MP_Dissipation); } extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc b/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc index 3682656..f2ac982 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_InitGamma, + LC_LOOP3VEC(ML_BSSN_MP_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_InitGamma); } extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc b/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc index 6d682b2..8f86b3a 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_InitRHS, + LC_LOOP3VEC(ML_BSSN_MP_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_MP_InitRHS); } extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc index 95d3c05..ad586eb 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Minkowski, + LC_LOOP3VEC(ML_BSSN_MP_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_MP_Minkowski); } extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc index 4254668..36e1728 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHS1, + LC_LOOP3VEC(ML_BSSN_MP_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHS1); } extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc index b0e6910..ab1f104 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHS2, + LC_LOOP3VEC(ML_BSSN_MP_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHS2); } extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc index 88986ad..2d72cf2 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_MP_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHSStaticBoundary); } extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_MP_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_MP_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc index 884c4a4..f7985cc 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_boundary, + LC_LOOP3VEC(ML_BSSN_MP_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_boundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_boundary); } extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc b/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc index abfb4bb..a01cac9 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_constraints1, + LC_LOOP3VEC(ML_BSSN_MP_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_MP_constraints1); } extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Ham","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Ham", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc b/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc index 083ec42..8288b08 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_constraints2, + LC_LOOP3VEC(ML_BSSN_MP_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_MP_constraints2); } extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_cons_detg","ML_BSSN_MP::ML_cons_Gamma","ML_BSSN_MP::ML_cons_traceA","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_mom","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_cons_detg", + "ML_BSSN_MP::ML_cons_Gamma", + "ML_BSSN_MP::ML_cons_traceA", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_mom", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc index 10920d8..c158da7 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_MP_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertFromADMBase); } extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc index c6d5dc4..30273ee 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_MP_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -746,7 +745,8 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -754,12 +754,14 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -837,13 +839,13 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -854,60 +856,17 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertFromADMBaseGamma); } extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -926,7 +885,17 @@ extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -948,7 +917,7 @@ extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc index eda4758..a229cd8 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBase); } extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc index cef9f17..74d185d 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -814,7 +813,8 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -822,15 +822,17 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -839,62 +841,22 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co kmsub(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -913,7 +875,19 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -935,7 +909,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc index a29cfcd..5f6ec46 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc index 3b21313..ff7cd82 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc b/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc index 9e5ab53..5c06425 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_enforce, + LC_LOOP3VEC(ML_BSSN_MP_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_enforce); + LC_ENDLOOP3VEC(ML_BSSN_MP_enforce); } extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/make.code.defn b/ML_BSSN_MP/src/make.code.defn index a63207b..4cde6e2 100644 --- a/ML_BSSN_MP/src/make.code.defn +++ b/ML_BSSN_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_MP_Minkowski.cc ML_BSSN_MP_convertFromADMBase.cc ML_BSSN_MP_InitGamma.cc ML_BSSN_MP_convertFromADMBaseGamma.cc ML_BSSN_MP_RHS1.cc ML_BSSN_MP_RHS2.cc ML_BSSN_MP_Dissipation.cc ML_BSSN_MP_Advect.cc ML_BSSN_MP_InitRHS.cc ML_BSSN_MP_RHSStaticBoundary.cc ML_BSSN_MP_enforce.cc ML_BSSN_MP_boundary.cc ML_BSSN_MP_convertToADMBase.cc ML_BSSN_MP_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_constraints1.cc ML_BSSN_MP_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_MP_Minkowski.cc ML_BSSN_MP_convertFromADMBase.cc ML_BSSN_MP_InitGamma.cc ML_BSSN_MP_convertFromADMBaseGamma.cc ML_BSSN_MP_RHS1.cc ML_BSSN_MP_RHS2.cc ML_BSSN_MP_Dissipation.cc ML_BSSN_MP_Advect.cc ML_BSSN_MP_InitRHS.cc ML_BSSN_MP_RHSStaticBoundary.cc ML_BSSN_MP_enforce.cc ML_BSSN_MP_boundary.cc ML_BSSN_MP_convertToADMBase.cc ML_BSSN_MP_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_constraints1.cc ML_BSSN_MP_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_MP_O8/param.ccl b/ML_BSSN_MP_O8/param.ccl index ff12765..6fc3795 100644 --- a/ML_BSSN_MP_O8/param.ccl +++ b/ML_BSSN_MP_O8/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_MP_O8_Minkowski_calc_every "ML_BSSN_MP_O8_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN_MP_O8/schedule.ccl b/ML_BSSN_MP_O8/schedule.ccl index dcf13f9..61acdbe 100644 --- a/ML_BSSN_MP_O8/schedule.ccl +++ b/ML_BSSN_MP_O8/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_MP_O8_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_MP_O8_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_MP_O8_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_MP_O8_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_O8_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_O8_InitGamma AT initial BEFORE ML_BSSN_MP_O8_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma } "ML_BSSN_MP_O8_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma } "ML_BSSN_MP_O8_convertFromADMBaseGamma" } schedule ML_BSSN_MP_O8_RHS1 IN ML_BSSN_MP_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_RHS1" schedule ML_BSSN_MP_O8_RHS2 IN ML_BSSN_MP_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_curvrhs } "ML_BSSN_MP_O8_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_MP_O8_Dissipation IN ML_BSSN_MP_O8_evolCalcGroup AFTER (ML_BSSN_MP_O8_RHS1 ML_BSSN_MP_O8_RHS2) { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_curvrhs + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtlapserhs + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_dtshiftrhs + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_Gammarhs + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_lapserhs + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_log_confacrhs + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_metricrhs + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_shiftrhs + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: ML_BSSN_MP_O8::ML_trace_curvrhs + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_Dissipation" } schedule ML_BSSN_MP_O8_Advect IN ML_BSSN_MP_O8_evolCalcGroup AFTER (ML_BSSN_MP_O8_RHS1 ML_BSSN_MP_O8_RHS2) { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_curvrhs + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtlapserhs + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_dtshiftrhs + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_Gammarhs + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_lapserhs + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_log_confacrhs + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_metricrhs + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_shiftrhs + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: ML_BSSN_MP_O8::ML_trace_curvrhs + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_Advect" schedule ML_BSSN_MP_O8_InitRHS AT analysis BEFORE ML_BSSN_MP_O8_evolCalcGroup { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_MP_O8_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_RHSStaticBoundary" } schedule ML_BSSN_MP_O8_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_lapse } "ML_BSSN_MP_O8_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_MP_O8_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_boundary" } schedule ML_BSSN_MP_O8_convertToADMBase IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_MP_O8_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_MP_O8_constraints1_group in MoL_PseudoEvolution after MoL schedule ML_BSSN_MP_O8_constraints1 in ML_BSSN_MP_O8_constraints1_group { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_Ham } "ML_BSSN_MP_O8_constraints1" schedule ML_BSSN_MP_O8_constraints1_SelectBCs in ML_BSSN_MP_O8_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_MP_O8_constraints2_group in MoL_PseudoEvolution after MoL schedule ML_BSSN_MP_O8_constraints2 in ML_BSSN_MP_O8_constraints2_group { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_cons_detg + WRITES: ML_BSSN_MP_O8::ML_cons_Gamma + WRITES: ML_BSSN_MP_O8::ML_cons_traceA + WRITES: ML_BSSN_MP_O8::ML_mom } "ML_BSSN_MP_O8_constraints2" schedule ML_BSSN_MP_O8_constraints2_SelectBCs in ML_BSSN_MP_O8_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_MP_O8_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_MP_O8_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_MP_O8_ApplyBCs in MoL_PostStep after ML_BSSN_MP_O8_SelectBoundConds { # no language specified diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc index f6ff7b5..68b8762 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Advect, + LC_LOOP3VEC(ML_BSSN_MP_O8_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1986,7 +1985,7 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con } phirhsL = - kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kadd(phirhsL,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt11,kmadd(beta2L,JacPDupwindNthAnti2gt11,kmadd(beta3L,JacPDupwindNthAnti3gt11,kmadd(JacPDupwindNthSymm1gt11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt11,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt11,kfabs(beta3L)))))))); @@ -2007,16 +2006,16 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt33,kmadd(beta2L,JacPDupwindNthAnti2gt33,kmadd(beta3L,JacPDupwindNthAnti3gt33,kmadd(JacPDupwindNthSymm1gt33,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt33,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt33,kfabs(beta3L)))))))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); trKrhsL = - kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kadd(trKrhsL,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNthAnti1At11,kmadd(beta2L,JacPDupwindNthAnti2At11,kmadd(beta3L,JacPDupwindNthAnti3At11,kmadd(JacPDupwindNthSymm1At11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2At11,kfabs(beta2L),kmul(JacPDupwindNthSymm3At11,kfabs(beta3L)))))))); @@ -2040,7 +2039,7 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con kmadd(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmsub(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(LapseAdvectionCoeff)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -2052,140 +2051,43 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B1,JacPDupwindNthAnti1Xt1),kmadd(beta2L,ksub(JacPDupwindNthAnti2B1,JacPDupwindNthAnti2Xt1),kmadd(beta3L,ksub(JacPDupwindNthAnti3B1,JacPDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B1,JacPDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B1,JacPDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B1,JacPDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B1,kmadd(beta2L,JacPDupwindNthAnti2B1,kmadd(beta3L,JacPDupwindNthAnti3B1,kmadd(JacPDupwindNthSymm1B1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B1,kfabs(beta2L),kmul(JacPDupwindNthSymm3B1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B2,JacPDupwindNthAnti1Xt2),kmadd(beta2L,ksub(JacPDupwindNthAnti2B2,JacPDupwindNthAnti2Xt2),kmadd(beta3L,ksub(JacPDupwindNthAnti3B2,JacPDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B2,JacPDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B2,JacPDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B2,JacPDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B2,kmadd(beta2L,JacPDupwindNthAnti2B2,kmadd(beta3L,JacPDupwindNthAnti3B2,kmadd(JacPDupwindNthSymm1B2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B2,kfabs(beta2L),kmul(JacPDupwindNthSymm3B2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B3,JacPDupwindNthAnti1Xt3),kmadd(beta2L,ksub(JacPDupwindNthAnti2B3,JacPDupwindNthAnti2Xt3),kmadd(beta3L,ksub(JacPDupwindNthAnti3B3,JacPDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B3,JacPDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B3,JacPDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B3,JacPDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Advect); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Advect); } extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) @@ -2204,7 +2106,25 @@ extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Advect", 18, groups); switch(fdOrder) @@ -2226,7 +2146,7 @@ extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc index 48ac519..e881266 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Dissipation, + LC_LOOP3VEC(ML_BSSN_MP_O8_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Dissipation); } extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc index 3ae1dc5..541b40c 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_InitGamma, + LC_LOOP3VEC(ML_BSSN_MP_O8_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_InitGamma); } extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc index c313d30..078f49b 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_InitRHS, + LC_LOOP3VEC(ML_BSSN_MP_O8_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_InitRHS); } extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc index 5a3d29d..5d55170 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Minkowski, + LC_LOOP3VEC(ML_BSSN_MP_O8_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Minkowski); } extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc index 9d1443d..f235204 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHS1, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHS1); } extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc index fcf925e..cdc4bad 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHS2, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHS2); } extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc index 58bf0a7..56504f3 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHSStaticBoundary); } extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_MP_O8_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_MP_O8_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc index 8806e19..f0f3b15 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_boundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_boundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_boundary); } extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_O8_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_O8_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc index dbef30b..6130f29 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_constraints1, + LC_LOOP3VEC(ML_BSSN_MP_O8_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_constraints1); } extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Ham","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Ham", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc index d5ce43b..216a96a 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_constraints2, + LC_LOOP3VEC(ML_BSSN_MP_O8_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_constraints2); } extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_cons_detg","ML_BSSN_MP_O8::ML_cons_Gamma","ML_BSSN_MP_O8::ML_cons_traceA","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_mom","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_cons_detg", + "ML_BSSN_MP_O8::ML_cons_Gamma", + "ML_BSSN_MP_O8::ML_cons_traceA", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_mom", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc index 667a19d..2cf4397 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertFromADMBase); } extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc index 76896a8..a5a0820 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -746,7 +745,8 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -754,12 +754,14 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -837,13 +839,13 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -854,60 +856,17 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertFromADMBaseGamma); } extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -926,7 +885,17 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -948,7 +917,7 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc index 22bc331..f5b2043 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBase); } extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc index b804915..ceecbd6 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -814,7 +813,8 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -822,15 +822,17 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -839,62 +841,22 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict kmsub(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -913,7 +875,19 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -935,7 +909,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc index 4354c9a..2d6b9cf 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENT return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENT break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc index 6e61e8e..e299e55 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc index eb47e9b..03a2d5a 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_enforce, + LC_LOOP3VEC(ML_BSSN_MP_O8_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_enforce); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_enforce); } extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/make.code.defn b/ML_BSSN_MP_O8/src/make.code.defn index 16b178f..7492ae5 100644 --- a/ML_BSSN_MP_O8/src/make.code.defn +++ b/ML_BSSN_MP_O8/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_MP_O8_Minkowski.cc ML_BSSN_MP_O8_convertFromADMBase.cc ML_BSSN_MP_O8_InitGamma.cc ML_BSSN_MP_O8_convertFromADMBaseGamma.cc ML_BSSN_MP_O8_RHS1.cc ML_BSSN_MP_O8_RHS2.cc ML_BSSN_MP_O8_Dissipation.cc ML_BSSN_MP_O8_Advect.cc ML_BSSN_MP_O8_InitRHS.cc ML_BSSN_MP_O8_RHSStaticBoundary.cc ML_BSSN_MP_O8_enforce.cc ML_BSSN_MP_O8_boundary.cc ML_BSSN_MP_O8_convertToADMBase.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_O8_constraints1.cc ML_BSSN_MP_O8_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_MP_O8_Minkowski.cc ML_BSSN_MP_O8_convertFromADMBase.cc ML_BSSN_MP_O8_InitGamma.cc ML_BSSN_MP_O8_convertFromADMBaseGamma.cc ML_BSSN_MP_O8_RHS1.cc ML_BSSN_MP_O8_RHS2.cc ML_BSSN_MP_O8_Dissipation.cc ML_BSSN_MP_O8_Advect.cc ML_BSSN_MP_O8_InitRHS.cc ML_BSSN_MP_O8_RHSStaticBoundary.cc ML_BSSN_MP_O8_enforce.cc ML_BSSN_MP_O8_boundary.cc ML_BSSN_MP_O8_convertToADMBase.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_O8_constraints1.cc ML_BSSN_MP_O8_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_O2/param.ccl b/ML_BSSN_O2/param.ccl index caaefef..3fdf2b9 100644 --- a/ML_BSSN_O2/param.ccl +++ b/ML_BSSN_O2/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_O2_Minkowski_calc_every "ML_BSSN_O2_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN_O2/schedule.ccl b/ML_BSSN_O2/schedule.ccl index 0b05f01..b607ca6 100644 --- a/ML_BSSN_O2/schedule.ccl +++ b/ML_BSSN_O2/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_O2_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O2_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O2_InitGamma AT initial BEFORE ML_BSSN_O2_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma } "ML_BSSN_O2_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma } "ML_BSSN_O2_convertFromADMBaseGamma" } schedule ML_BSSN_O2_RHS1 IN ML_BSSN_O2_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_RHS1" schedule ML_BSSN_O2_RHS2 IN ML_BSSN_O2_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_curvrhs } "ML_BSSN_O2_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_O2_Dissipation IN ML_BSSN_O2_evolCalcGroup AFTER (ML_BSSN_O2_RHS1 ML_BSSN_O2_RHS2) { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_curvrhs + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtlapserhs + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_dtshiftrhs + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_Gammarhs + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_lapserhs + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_log_confacrhs + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_metricrhs + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_shiftrhs + READS: ML_BSSN_O2::ML_trace_curv + READS: ML_BSSN_O2::ML_trace_curvrhs + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_Dissipation" } schedule ML_BSSN_O2_Advect IN ML_BSSN_O2_evolCalcGroup AFTER (ML_BSSN_O2_RHS1 ML_BSSN_O2_RHS2) { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_curvrhs + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtlapserhs + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_dtshiftrhs + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_Gammarhs + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_lapserhs + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_log_confacrhs + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_metricrhs + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_shiftrhs + READS: ML_BSSN_O2::ML_trace_curv + READS: ML_BSSN_O2::ML_trace_curvrhs + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_Advect" schedule ML_BSSN_O2_InitRHS AT analysis BEFORE ML_BSSN_O2_evolCalcGroup { LANG: C + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_O2_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_RHSStaticBoundary" } schedule ML_BSSN_O2_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_lapse } "ML_BSSN_O2_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_O2_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_boundary" } schedule ML_BSSN_O2_convertToADMBase IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_O2_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_O2_convertToADMBaseFakeDtLapseShift IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_O2_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O2_constraints1 in ML_BSSN_O2_constraints1_group { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_Ham } "ML_BSSN_O2_constraints1" schedule ML_BSSN_O2_constraints1_SelectBCs in ML_BSSN_O2_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_O2_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O2_constraints2 in ML_BSSN_O2_constraints2_group { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_cons_detg + WRITES: ML_BSSN_O2::ML_cons_Gamma + WRITES: ML_BSSN_O2::ML_cons_traceA + WRITES: ML_BSSN_O2::ML_mom } "ML_BSSN_O2_constraints2" schedule ML_BSSN_O2_constraints2_SelectBCs in ML_BSSN_O2_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_O2_ApplyBCs in MoL_PostStep after ML_BSSN_O2_SelectBoundConds { # no language specified diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc index 48b0610..e0c80a1 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Advect, + LC_LOOP3VEC(ML_BSSN_O2_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1986,7 +1985,7 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const } phirhsL = - kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kadd(phirhsL,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt11,kmadd(beta2L,JacPDupwindNthAnti2gt11,kmadd(beta3L,JacPDupwindNthAnti3gt11,kmadd(JacPDupwindNthSymm1gt11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt11,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt11,kfabs(beta3L)))))))); @@ -2007,16 +2006,16 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt33,kmadd(beta2L,JacPDupwindNthAnti2gt33,kmadd(beta3L,JacPDupwindNthAnti3gt33,kmadd(JacPDupwindNthSymm1gt33,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt33,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt33,kfabs(beta3L)))))))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); trKrhsL = - kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kadd(trKrhsL,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNthAnti1At11,kmadd(beta2L,JacPDupwindNthAnti2At11,kmadd(beta3L,JacPDupwindNthAnti3At11,kmadd(JacPDupwindNthSymm1At11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2At11,kfabs(beta2L),kmul(JacPDupwindNthSymm3At11,kfabs(beta3L)))))))); @@ -2040,7 +2039,7 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmsub(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(LapseAdvectionCoeff)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -2052,140 +2051,43 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B1,JacPDupwindNthAnti1Xt1),kmadd(beta2L,ksub(JacPDupwindNthAnti2B1,JacPDupwindNthAnti2Xt1),kmadd(beta3L,ksub(JacPDupwindNthAnti3B1,JacPDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B1,JacPDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B1,JacPDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B1,JacPDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B1,kmadd(beta2L,JacPDupwindNthAnti2B1,kmadd(beta3L,JacPDupwindNthAnti3B1,kmadd(JacPDupwindNthSymm1B1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B1,kfabs(beta2L),kmul(JacPDupwindNthSymm3B1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B2,JacPDupwindNthAnti1Xt2),kmadd(beta2L,ksub(JacPDupwindNthAnti2B2,JacPDupwindNthAnti2Xt2),kmadd(beta3L,ksub(JacPDupwindNthAnti3B2,JacPDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B2,JacPDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B2,JacPDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B2,JacPDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B2,kmadd(beta2L,JacPDupwindNthAnti2B2,kmadd(beta3L,JacPDupwindNthAnti3B2,kmadd(JacPDupwindNthSymm1B2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B2,kfabs(beta2L),kmul(JacPDupwindNthSymm3B2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B3,JacPDupwindNthAnti1Xt3),kmadd(beta2L,ksub(JacPDupwindNthAnti2B3,JacPDupwindNthAnti2Xt3),kmadd(beta3L,ksub(JacPDupwindNthAnti3B3,JacPDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B3,JacPDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B3,JacPDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B3,JacPDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Advect); + LC_ENDLOOP3VEC(ML_BSSN_O2_Advect); } extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) @@ -2204,7 +2106,25 @@ extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Advect", 18, groups); switch(fdOrder) @@ -2226,7 +2146,7 @@ extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc index f257d4c..255a4d1 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Dissipation, + LC_LOOP3VEC(ML_BSSN_O2_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_O2_Dissipation); } extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc b/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc index df0e4bf..6ac41e5 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_InitGamma, + LC_LOOP3VEC(ML_BSSN_O2_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_O2_InitGamma); } extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc b/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc index 9c6c6a9..9e86ac6 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_InitRHS, + LC_LOOP3VEC(ML_BSSN_O2_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_O2_InitRHS); } extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc index 7c15ee6..3a9e014 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Minkowski, + LC_LOOP3VEC(ML_BSSN_O2_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_O2_Minkowski); } extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc index 8f56d98..89602bb 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHS1, + LC_LOOP3VEC(ML_BSSN_O2_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHS1); } extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc index 5e2b239..d92ff3d 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHS2, + LC_LOOP3VEC(ML_BSSN_O2_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHS2); } extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc index ff0f1ff..49724d8 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_O2_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHSStaticBoundary); } extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_O2_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_O2_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc index 18f666f..a985413 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_boundary, + LC_LOOP3VEC(ML_BSSN_O2_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_boundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_boundary); } extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O2_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O2_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc b/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc index 0c54cb0..20233ae 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_constraints1, + LC_LOOP3VEC(ML_BSSN_O2_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_O2_constraints1); } extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Ham","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Ham", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc b/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc index b17e55a..e180c4c 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_constraints2, + LC_LOOP3VEC(ML_BSSN_O2_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_O2_constraints2); } extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_cons_detg","ML_BSSN_O2::ML_cons_Gamma","ML_BSSN_O2::ML_cons_traceA","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_mom","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_cons_detg", + "ML_BSSN_O2::ML_cons_Gamma", + "ML_BSSN_O2::ML_cons_traceA", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_mom", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc index e965987..a2b3d26 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_O2_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertFromADMBase); } extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc index b5796ee..8a4134d 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_O2_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -746,7 +745,8 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -754,12 +754,14 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -837,13 +839,13 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -854,60 +856,17 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertFromADMBaseGamma); } extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -926,7 +885,17 @@ extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -948,7 +917,7 @@ extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc index f61afd8..7b47457 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBase); } extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc index 0e83bb5..1882fa9 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -814,7 +813,8 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -822,15 +822,17 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -839,62 +841,22 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co kmsub(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -913,7 +875,19 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -935,7 +909,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc index 471d244..ccbdae5 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc index a9774f4..572ec21 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc b/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc index 88ce028..7ad6d3b 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_enforce, + LC_LOOP3VEC(ML_BSSN_O2_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_enforce); + LC_ENDLOOP3VEC(ML_BSSN_O2_enforce); } extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/make.code.defn b/ML_BSSN_O2/src/make.code.defn index a98a680..f71475a 100644 --- a/ML_BSSN_O2/src/make.code.defn +++ b/ML_BSSN_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_O2_Minkowski.cc ML_BSSN_O2_convertFromADMBase.cc ML_BSSN_O2_InitGamma.cc ML_BSSN_O2_convertFromADMBaseGamma.cc ML_BSSN_O2_RHS1.cc ML_BSSN_O2_RHS2.cc ML_BSSN_O2_Dissipation.cc ML_BSSN_O2_Advect.cc ML_BSSN_O2_InitRHS.cc ML_BSSN_O2_RHSStaticBoundary.cc ML_BSSN_O2_enforce.cc ML_BSSN_O2_boundary.cc ML_BSSN_O2_convertToADMBase.cc ML_BSSN_O2_convertToADMBaseDtLapseShift.cc ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O2_constraints1.cc ML_BSSN_O2_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_O2_Minkowski.cc ML_BSSN_O2_convertFromADMBase.cc ML_BSSN_O2_InitGamma.cc ML_BSSN_O2_convertFromADMBaseGamma.cc ML_BSSN_O2_RHS1.cc ML_BSSN_O2_RHS2.cc ML_BSSN_O2_Dissipation.cc ML_BSSN_O2_Advect.cc ML_BSSN_O2_InitRHS.cc ML_BSSN_O2_RHSStaticBoundary.cc ML_BSSN_O2_enforce.cc ML_BSSN_O2_boundary.cc ML_BSSN_O2_convertToADMBase.cc ML_BSSN_O2_convertToADMBaseDtLapseShift.cc ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O2_constraints1.cc ML_BSSN_O2_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_O8/param.ccl b/ML_BSSN_O8/param.ccl index 4d931a5..2073d46 100644 --- a/ML_BSSN_O8/param.ccl +++ b/ML_BSSN_O8/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_O8_Minkowski_calc_every "ML_BSSN_O8_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN_O8/schedule.ccl b/ML_BSSN_O8/schedule.ccl index e068b7c..d734ed8 100644 --- a/ML_BSSN_O8/schedule.ccl +++ b/ML_BSSN_O8/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_O8_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_O8_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_O8_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_O8_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O8_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O8_InitGamma AT initial BEFORE ML_BSSN_O8_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma } "ML_BSSN_O8_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma } "ML_BSSN_O8_convertFromADMBaseGamma" } schedule ML_BSSN_O8_RHS1 IN ML_BSSN_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_RHS1" schedule ML_BSSN_O8_RHS2 IN ML_BSSN_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_curvrhs } "ML_BSSN_O8_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_O8_Dissipation IN ML_BSSN_O8_evolCalcGroup AFTER (ML_BSSN_O8_RHS1 ML_BSSN_O8_RHS2) { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_curvrhs + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtlapserhs + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_dtshiftrhs + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_Gammarhs + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_lapserhs + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_log_confacrhs + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_metricrhs + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_shiftrhs + READS: ML_BSSN_O8::ML_trace_curv + READS: ML_BSSN_O8::ML_trace_curvrhs + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_Dissipation" } schedule ML_BSSN_O8_Advect IN ML_BSSN_O8_evolCalcGroup AFTER (ML_BSSN_O8_RHS1 ML_BSSN_O8_RHS2) { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_curvrhs + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtlapserhs + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_dtshiftrhs + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_Gammarhs + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_lapserhs + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_log_confacrhs + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_metricrhs + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_shiftrhs + READS: ML_BSSN_O8::ML_trace_curv + READS: ML_BSSN_O8::ML_trace_curvrhs + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_Advect" schedule ML_BSSN_O8_InitRHS AT analysis BEFORE ML_BSSN_O8_evolCalcGroup { LANG: C + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_O8_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_RHSStaticBoundary" } schedule ML_BSSN_O8_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_lapse } "ML_BSSN_O8_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_O8_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_boundary" } schedule ML_BSSN_O8_convertToADMBase IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_O8_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_O8_convertToADMBaseFakeDtLapseShift IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_O8_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O8_constraints1 in ML_BSSN_O8_constraints1_group { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_Ham } "ML_BSSN_O8_constraints1" schedule ML_BSSN_O8_constraints1_SelectBCs in ML_BSSN_O8_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_O8_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O8_constraints2 in ML_BSSN_O8_constraints2_group { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_cons_detg + WRITES: ML_BSSN_O8::ML_cons_Gamma + WRITES: ML_BSSN_O8::ML_cons_traceA + WRITES: ML_BSSN_O8::ML_mom } "ML_BSSN_O8_constraints2" schedule ML_BSSN_O8_constraints2_SelectBCs in ML_BSSN_O8_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_O8_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_O8_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_O8_ApplyBCs in MoL_PostStep after ML_BSSN_O8_SelectBoundConds { # no language specified diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc index 78dbbb4..0ecbeec 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Advect, + LC_LOOP3VEC(ML_BSSN_O8_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1986,7 +1985,7 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const } phirhsL = - kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kadd(phirhsL,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNthAnti1phi,kmadd(beta2L,JacPDupwindNthAnti2phi,kmadd(beta3L,JacPDupwindNthAnti3phi,kmadd(JacPDupwindNthSymm1phi,kfabs(beta1L),kmadd(JacPDupwindNthSymm2phi,kfabs(beta2L),kmul(JacPDupwindNthSymm3phi,kfabs(beta3L)))))))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt11,kmadd(beta2L,JacPDupwindNthAnti2gt11,kmadd(beta3L,JacPDupwindNthAnti3gt11,kmadd(JacPDupwindNthSymm1gt11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt11,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt11,kfabs(beta3L)))))))); @@ -2007,16 +2006,16 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNthAnti1gt33,kmadd(beta2L,JacPDupwindNthAnti2gt33,kmadd(beta3L,JacPDupwindNthAnti3gt33,kmadd(JacPDupwindNthSymm1gt33,kfabs(beta1L),kmadd(JacPDupwindNthSymm2gt33,kfabs(beta2L),kmul(JacPDupwindNthSymm3gt33,kfabs(beta3L)))))))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kadd(Xt1rhsL,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L)))))))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kadd(Xt2rhsL,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L)))))))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kadd(Xt3rhsL,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L)))))))); trKrhsL = - kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kadd(trKrhsL,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L)))))))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNthAnti1At11,kmadd(beta2L,JacPDupwindNthAnti2At11,kmadd(beta3L,JacPDupwindNthAnti3At11,kmadd(JacPDupwindNthSymm1At11,kfabs(beta1L),kmadd(JacPDupwindNthSymm2At11,kfabs(beta2L),kmul(JacPDupwindNthSymm3At11,kfabs(beta3L)))))))); @@ -2040,7 +2039,7 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmsub(kmadd(beta1L,JacPDupwindNthAnti1A,kmadd(beta2L,JacPDupwindNthAnti2A,kmadd(beta3L,JacPDupwindNthAnti3A,kmadd(JacPDupwindNthSymm1A,kfabs(beta1L),kmadd(JacPDupwindNthSymm2A,kfabs(beta2L),kmul(JacPDupwindNthSymm3A,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1trK,kmadd(beta2L,JacPDupwindNthAnti2trK,kmadd(beta3L,JacPDupwindNthAnti3trK,kmadd(JacPDupwindNthSymm1trK,kfabs(beta1L),kmadd(JacPDupwindNthSymm2trK,kfabs(beta2L),kmul(JacPDupwindNthSymm3trK,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(LapseAdvectionCoeff)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -2052,140 +2051,43 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B1,JacPDupwindNthAnti1Xt1),kmadd(beta2L,ksub(JacPDupwindNthAnti2B1,JacPDupwindNthAnti2Xt1),kmadd(beta3L,ksub(JacPDupwindNthAnti3B1,JacPDupwindNthAnti3Xt1),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B1,JacPDupwindNthSymm1Xt1),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B1,JacPDupwindNthSymm2Xt1),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B1,JacPDupwindNthSymm3Xt1))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B1,kmadd(beta2L,JacPDupwindNthAnti2B1,kmadd(beta3L,JacPDupwindNthAnti3B1,kmadd(JacPDupwindNthSymm1B1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B1,kfabs(beta2L),kmul(JacPDupwindNthSymm3B1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt1,kmadd(beta2L,JacPDupwindNthAnti2Xt1,kmadd(beta3L,JacPDupwindNthAnti3Xt1,kmadd(JacPDupwindNthSymm1Xt1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt1,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt1,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B2,JacPDupwindNthAnti1Xt2),kmadd(beta2L,ksub(JacPDupwindNthAnti2B2,JacPDupwindNthAnti2Xt2),kmadd(beta3L,ksub(JacPDupwindNthAnti3B2,JacPDupwindNthAnti3Xt2),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B2,JacPDupwindNthSymm1Xt2),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B2,JacPDupwindNthSymm2Xt2),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B2,JacPDupwindNthSymm3Xt2))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B2,kmadd(beta2L,JacPDupwindNthAnti2B2,kmadd(beta3L,JacPDupwindNthAnti3B2,kmadd(JacPDupwindNthSymm1B2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B2,kfabs(beta2L),kmul(JacPDupwindNthSymm3B2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt2,kmadd(beta2L,JacPDupwindNthAnti2Xt2,kmadd(beta3L,JacPDupwindNthAnti3Xt2,kmadd(JacPDupwindNthSymm1Xt2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt2,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt2,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(kmadd(beta1L,ksub(JacPDupwindNthAnti1B3,JacPDupwindNthAnti1Xt3),kmadd(beta2L,ksub(JacPDupwindNthAnti2B3,JacPDupwindNthAnti2Xt3),kmadd(beta3L,ksub(JacPDupwindNthAnti3B3,JacPDupwindNthAnti3Xt3),kmadd(kfabs(beta1L),ksub(JacPDupwindNthSymm1B3,JacPDupwindNthSymm1Xt3),kmadd(kfabs(beta2L),ksub(JacPDupwindNthSymm2B3,JacPDupwindNthSymm2Xt3),kmul(kfabs(beta3L),ksub(JacPDupwindNthSymm3B3,JacPDupwindNthSymm3Xt3))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),ToReal(ShiftBCoeff)))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Advect); + LC_ENDLOOP3VEC(ML_BSSN_O8_Advect); } extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) @@ -2204,7 +2106,25 @@ extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Advect", 18, groups); switch(fdOrder) @@ -2226,7 +2146,7 @@ extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc index 402faa8..7965398 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Dissipation, + LC_LOOP3VEC(ML_BSSN_O8_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_O8_Dissipation); } extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc index d6c5130..4bfcc04 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_InitGamma, + LC_LOOP3VEC(ML_BSSN_O8_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_O8_InitGamma); } extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc index b623b70..7b85d13 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_InitRHS, + LC_LOOP3VEC(ML_BSSN_O8_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_O8_InitRHS); } extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc index 5884f87..e96858a 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Minkowski, + LC_LOOP3VEC(ML_BSSN_O8_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_O8_Minkowski); } extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc index 6467c7f..968fd06 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHS1, + LC_LOOP3VEC(ML_BSSN_O8_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHS1); } extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc index 841675a..b57d3e9 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHS2, + LC_LOOP3VEC(ML_BSSN_O8_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHS2); } extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc index 6696248..387b7c4 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_O8_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHSStaticBoundary); } extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_O8_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_O8_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc index afe203b..ea2dfd5 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_boundary, + LC_LOOP3VEC(ML_BSSN_O8_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_boundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_boundary); } extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O8_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O8_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc b/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc index e1dbc8f..a4d0133 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_constraints1, + LC_LOOP3VEC(ML_BSSN_O8_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_O8_constraints1); } extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Ham","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Ham", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc b/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc index b5a675e..3e84842 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_constraints2, + LC_LOOP3VEC(ML_BSSN_O8_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_O8_constraints2); } extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_cons_detg","ML_BSSN_O8::ML_cons_Gamma","ML_BSSN_O8::ML_cons_traceA","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_mom","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_cons_detg", + "ML_BSSN_O8::ML_cons_Gamma", + "ML_BSSN_O8::ML_cons_traceA", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_mom", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc index 1ce1c33..5203f5b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_O8_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertFromADMBase); } extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc index 24cf91f..8386f9b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_O8_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -746,7 +745,8 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -754,12 +754,14 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -837,13 +839,13 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -854,60 +856,17 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertFromADMBaseGamma); } extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -926,7 +885,17 @@ extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -948,7 +917,7 @@ extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc index 8a7e3a3..98afb4e 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBase); } extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc index 5fc4a66..3d8e868 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -814,7 +813,8 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -822,15 +822,17 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -839,62 +841,22 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co kmsub(kmadd(beta1L,JacPDupwindNthAnti1alpha,kmadd(beta2L,JacPDupwindNthAnti2alpha,kmadd(beta3L,JacPDupwindNthAnti3alpha,kmadd(JacPDupwindNthSymm1alpha,kfabs(beta1L),kmadd(JacPDupwindNthSymm2alpha,kfabs(beta2L),kmul(JacPDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -913,7 +875,19 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -935,7 +909,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc index 1bf55af..54fba2b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc index ba27c1b..c6f5687 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc b/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc index 4ed51fe..df3de65 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_enforce, + LC_LOOP3VEC(ML_BSSN_O8_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_enforce); + LC_ENDLOOP3VEC(ML_BSSN_O8_enforce); } extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/make.code.defn b/ML_BSSN_O8/src/make.code.defn index bd625ef..0cf766f 100644 --- a/ML_BSSN_O8/src/make.code.defn +++ b/ML_BSSN_O8/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_O8_Minkowski.cc ML_BSSN_O8_convertFromADMBase.cc ML_BSSN_O8_InitGamma.cc ML_BSSN_O8_convertFromADMBaseGamma.cc ML_BSSN_O8_RHS1.cc ML_BSSN_O8_RHS2.cc ML_BSSN_O8_Dissipation.cc ML_BSSN_O8_Advect.cc ML_BSSN_O8_InitRHS.cc ML_BSSN_O8_RHSStaticBoundary.cc ML_BSSN_O8_enforce.cc ML_BSSN_O8_boundary.cc ML_BSSN_O8_convertToADMBase.cc ML_BSSN_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O8_constraints1.cc ML_BSSN_O8_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_O8_Minkowski.cc ML_BSSN_O8_convertFromADMBase.cc ML_BSSN_O8_InitGamma.cc ML_BSSN_O8_convertFromADMBaseGamma.cc ML_BSSN_O8_RHS1.cc ML_BSSN_O8_RHS2.cc ML_BSSN_O8_Dissipation.cc ML_BSSN_O8_Advect.cc ML_BSSN_O8_InitRHS.cc ML_BSSN_O8_RHSStaticBoundary.cc ML_BSSN_O8_enforce.cc ML_BSSN_O8_boundary.cc ML_BSSN_O8_convertToADMBase.cc ML_BSSN_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O8_constraints1.cc ML_BSSN_O8_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_UPW/param.ccl b/ML_BSSN_UPW/param.ccl index 180d131..6df5b1f 100644 --- a/ML_BSSN_UPW/param.ccl +++ b/ML_BSSN_UPW/param.ccl @@ -90,7 +90,7 @@ CCTK_REAL BetaDriver "BetaDriver" } 0 restricted: -CCTK_REAL LapseAdvectionCoeff "Factor in front of the shift advection terms in 1+log" +CCTK_REAL LapseAdvectionCoeff "Factor in front of the lapse advection terms in 1+log" { "*:*" :: "" } 1 @@ -237,6 +237,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT ML_BSSN_UPW_Minkowski_calc_every "ML_BSSN_UPW_Minkowski_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_BSSN_UPW/schedule.ccl b/ML_BSSN_UPW/schedule.ccl index a19a7de..6b69c93 100644 --- a/ML_BSSN_UPW/schedule.ccl +++ b/ML_BSSN_UPW/schedule.ccl @@ -1,15 +1,30 @@ # File produced by Kranc -STORAGE: ML_cons_detg[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_detg[1] +} -STORAGE: ML_cons_Gamma[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_Gamma[1] +} -STORAGE: ML_cons_traceA[1] +if (other_timelevels == 1) +{ + STORAGE: ML_cons_traceA[1] +} -STORAGE: ML_Ham[1] +if (other_timelevels == 1) +{ + STORAGE: ML_Ham[1] +} -STORAGE: ML_mom[1] +if (other_timelevels == 1) +{ + STORAGE: ML_mom[1] +} if (timelevels == 1) { @@ -251,12 +266,6 @@ schedule ML_BSSN_UPW_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_UPW_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_UPW_RegisterSymmetries in SymmetryRegister { LANG: C @@ -269,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_UPW_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_Minkowski" } @@ -278,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_UPW_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_convertFromADMBase" } @@ -287,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_UPW_InitGamma AT initial BEFORE ML_BSSN_UPW_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma } "ML_BSSN_UPW_InitGamma" } @@ -299,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma } "ML_BSSN_UPW_convertFromADMBaseGamma" } schedule ML_BSSN_UPW_RHS1 IN ML_BSSN_UPW_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_RHS1" schedule ML_BSSN_UPW_RHS2 IN ML_BSSN_UPW_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_curvrhs } "ML_BSSN_UPW_RHS2" @@ -318,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_UPW_Dissipation IN ML_BSSN_UPW_evolCalcGroup AFTER (ML_BSSN_UPW_RHS1 ML_BSSN_UPW_RHS2) { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_curvrhs + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtlapserhs + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_dtshiftrhs + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_Gammarhs + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_lapserhs + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_log_confacrhs + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_metricrhs + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_shiftrhs + READS: ML_BSSN_UPW::ML_trace_curv + READS: ML_BSSN_UPW::ML_trace_curvrhs + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_Dissipation" } schedule ML_BSSN_UPW_Advect IN ML_BSSN_UPW_evolCalcGroup AFTER (ML_BSSN_UPW_RHS1 ML_BSSN_UPW_RHS2) { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_curvrhs + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtlapserhs + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_dtshiftrhs + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_Gammarhs + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_lapserhs + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_log_confacrhs + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_metricrhs + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_shiftrhs + READS: ML_BSSN_UPW::ML_trace_curv + READS: ML_BSSN_UPW::ML_trace_curvrhs + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_Advect" schedule ML_BSSN_UPW_InitRHS AT analysis BEFORE ML_BSSN_UPW_evolCalcGroup { LANG: C + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_InitRHS" @@ -337,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_UPW_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_RHSStaticBoundary" } schedule ML_BSSN_UPW_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_lapse } "ML_BSSN_UPW_enforce" @@ -351,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_UPW_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_boundary" } schedule ML_BSSN_UPW_convertToADMBase IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_UPW_convertToADMBase" @@ -367,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseDtLapseShift" } @@ -376,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary" } @@ -385,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift" } @@ -396,6 +605,17 @@ schedule group ML_BSSN_UPW_constraints1_group in MoL_PseudoEvolution after MoL_P schedule ML_BSSN_UPW_constraints1 in ML_BSSN_UPW_constraints1_group { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_Ham } "ML_BSSN_UPW_constraints1" schedule ML_BSSN_UPW_constraints1_SelectBCs in ML_BSSN_UPW_constraints1_bc_group @@ -428,6 +648,20 @@ schedule group ML_BSSN_UPW_constraints2_group in MoL_PseudoEvolution after MoL_P schedule ML_BSSN_UPW_constraints2 in ML_BSSN_UPW_constraints2_group { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_cons_detg + WRITES: ML_BSSN_UPW::ML_cons_Gamma + WRITES: ML_BSSN_UPW::ML_cons_traceA + WRITES: ML_BSSN_UPW::ML_mom } "ML_BSSN_UPW_constraints2" schedule ML_BSSN_UPW_constraints2_SelectBCs in ML_BSSN_UPW_constraints2_bc_group @@ -476,6 +710,12 @@ schedule ML_BSSN_UPW_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_UPW_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_UPW_ApplyBCs in MoL_PostStep after ML_BSSN_UPW_SelectBoundConds { # no language specified diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc index 132f6d2..1fc6d96 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Advect, + LC_LOOP3VEC(ML_BSSN_UPW_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const } phirhsL = - kmadd(beta1L,JacPDupwindNth1phi,kmadd(beta2L,JacPDupwindNth2phi,kmadd(beta3L,JacPDupwindNth3phi,phirhsL))); + kadd(phirhsL,kmadd(beta1L,JacPDupwindNth1phi,kmadd(beta2L,JacPDupwindNth2phi,kmul(beta3L,JacPDupwindNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(beta1L,JacPDupwindNth1gt11,kmadd(beta2L,JacPDupwindNth2gt11,kmul(beta3L,JacPDupwindNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const kadd(gt33rhsL,kmadd(beta1L,JacPDupwindNth1gt33,kmadd(beta2L,JacPDupwindNth2gt33,kmul(beta3L,JacPDupwindNth3gt33)))); Xt1rhsL = - kmadd(beta1L,JacPDupwindNth1Xt1,kmadd(beta2L,JacPDupwindNth2Xt1,kmadd(beta3L,JacPDupwindNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(beta1L,JacPDupwindNth1Xt1,kmadd(beta2L,JacPDupwindNth2Xt1,kmul(beta3L,JacPDupwindNth3Xt1)))); Xt2rhsL = - kmadd(beta1L,JacPDupwindNth1Xt2,kmadd(beta2L,JacPDupwindNth2Xt2,kmadd(beta3L,JacPDupwindNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(beta1L,JacPDupwindNth1Xt2,kmadd(beta2L,JacPDupwindNth2Xt2,kmul(beta3L,JacPDupwindNth3Xt2)))); Xt3rhsL = - kmadd(beta1L,JacPDupwindNth1Xt3,kmadd(beta2L,JacPDupwindNth2Xt3,kmadd(beta3L,JacPDupwindNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(beta1L,JacPDupwindNth1Xt3,kmadd(beta2L,JacPDupwindNth2Xt3,kmul(beta3L,JacPDupwindNth3Xt3)))); trKrhsL = - kmadd(beta1L,JacPDupwindNth1trK,kmadd(beta2L,JacPDupwindNth2trK,kmadd(beta3L,JacPDupwindNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(beta1L,JacPDupwindNth1trK,kmadd(beta2L,JacPDupwindNth2trK,kmul(beta3L,JacPDupwindNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(beta1L,JacPDupwindNth1At11,kmadd(beta2L,JacPDupwindNth2At11,kmul(beta3L,JacPDupwindNth3At11)))); @@ -1215,7 +1214,7 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNth1alpha,kmadd(beta2L,JacPDupwindNth2alpha,kmul(beta3L,JacPDupwindNth3alpha))),ToReal(LapseAdvectionCoeff),alpharhsL); ArhsL = - kmadd(kmadd(beta1L,JacPDupwindNth1A,kmadd(beta2L,JacPDupwindNth2A,kmul(beta3L,JacPDupwindNth3A))),ToReal(LapseAdvectionCoeff),ArhsL); + kmadd(ToReal(LapseACoeff),kmadd(beta1L,kmadd(ksub(JacPDupwindNth1A,JacPDupwindNth1trK),ToReal(LapseAdvectionCoeff),JacPDupwindNth1trK),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2A,JacPDupwindNth2trK),ToReal(LapseAdvectionCoeff),JacPDupwindNth2trK),kmul(beta3L,kmadd(ksub(JacPDupwindNth3A,JacPDupwindNth3trK),ToReal(LapseAdvectionCoeff),JacPDupwindNth3trK)))),ArhsL); beta1rhsL = kmadd(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),beta1rhsL); @@ -1227,140 +1226,43 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const kmadd(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),beta3rhsL); B1rhsL = - kadd(B1rhsL,kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B1,JacPDupwindNth1Xt1),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth1Xt1,ToReal(ShiftBCoeff))),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B1,JacPDupwindNth2Xt1),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth2Xt1,ToReal(ShiftBCoeff))),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B1,JacPDupwindNth3Xt1),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth3Xt1,ToReal(ShiftBCoeff))))))); + kmadd(kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B1,JacPDupwindNth1Xt1),ToReal(ShiftAdvectionCoeff),JacPDupwindNth1Xt1),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B1,JacPDupwindNth2Xt1),ToReal(ShiftAdvectionCoeff),JacPDupwindNth2Xt1),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B1,JacPDupwindNth3Xt1),ToReal(ShiftAdvectionCoeff),JacPDupwindNth3Xt1)))),ToReal(ShiftBCoeff),B1rhsL); B2rhsL = - kadd(B2rhsL,kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B2,JacPDupwindNth1Xt2),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth1Xt2,ToReal(ShiftBCoeff))),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B2,JacPDupwindNth2Xt2),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth2Xt2,ToReal(ShiftBCoeff))),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B2,JacPDupwindNth3Xt2),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth3Xt2,ToReal(ShiftBCoeff))))))); + kmadd(kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B2,JacPDupwindNth1Xt2),ToReal(ShiftAdvectionCoeff),JacPDupwindNth1Xt2),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B2,JacPDupwindNth2Xt2),ToReal(ShiftAdvectionCoeff),JacPDupwindNth2Xt2),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B2,JacPDupwindNth3Xt2),ToReal(ShiftAdvectionCoeff),JacPDupwindNth3Xt2)))),ToReal(ShiftBCoeff),B2rhsL); B3rhsL = - kadd(B3rhsL,kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B3,JacPDupwindNth1Xt3),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth1Xt3,ToReal(ShiftBCoeff))),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B3,JacPDupwindNth2Xt3),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth2Xt3,ToReal(ShiftBCoeff))),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B3,JacPDupwindNth3Xt3),ToReal(ShiftAdvectionCoeff),kmul(JacPDupwindNth3Xt3,ToReal(ShiftBCoeff))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kmadd(kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B3,JacPDupwindNth1Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth1Xt3),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B3,JacPDupwindNth2Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth2Xt3),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B3,JacPDupwindNth3Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth3Xt3)))),ToReal(ShiftBCoeff),B3rhsL); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Advect); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Advect); } extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Advect", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc index c2a54b5..f0b0b60 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Dissipation, + LC_LOOP3VEC(ML_BSSN_UPW_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1161,7 +1160,7 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC epsdiss3 = ToReal(EpsDiss); phirhsL = - kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmadd(epsdiss3,JacPDdissipationNth3phi,phirhsL))); + kadd(phirhsL,kmadd(epsdiss1,JacPDdissipationNth1phi,kmadd(epsdiss2,JacPDdissipationNth2phi,kmul(epsdiss3,JacPDdissipationNth3phi)))); gt11rhsL = kadd(gt11rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt11,kmadd(epsdiss2,JacPDdissipationNth2gt11,kmul(epsdiss3,JacPDdissipationNth3gt11)))); @@ -1182,16 +1181,16 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int kadd(gt33rhsL,kmadd(epsdiss1,JacPDdissipationNth1gt33,kmadd(epsdiss2,JacPDdissipationNth2gt33,kmul(epsdiss3,JacPDdissipationNth3gt33)))); Xt1rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmadd(epsdiss3,JacPDdissipationNth3Xt1,Xt1rhsL))); + kadd(Xt1rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt1,kmadd(epsdiss2,JacPDdissipationNth2Xt1,kmul(epsdiss3,JacPDdissipationNth3Xt1)))); Xt2rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmadd(epsdiss3,JacPDdissipationNth3Xt2,Xt2rhsL))); + kadd(Xt2rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt2,kmadd(epsdiss2,JacPDdissipationNth2Xt2,kmul(epsdiss3,JacPDdissipationNth3Xt2)))); Xt3rhsL = - kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmadd(epsdiss3,JacPDdissipationNth3Xt3,Xt3rhsL))); + kadd(Xt3rhsL,kmadd(epsdiss1,JacPDdissipationNth1Xt3,kmadd(epsdiss2,JacPDdissipationNth2Xt3,kmul(epsdiss3,JacPDdissipationNth3Xt3)))); trKrhsL = - kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmadd(epsdiss3,JacPDdissipationNth3trK,trKrhsL))); + kadd(trKrhsL,kmadd(epsdiss1,JacPDdissipationNth1trK,kmadd(epsdiss2,JacPDdissipationNth2trK,kmul(epsdiss3,JacPDdissipationNth3trK)))); At11rhsL = kadd(At11rhsL,kmadd(epsdiss1,JacPDdissipationNth1At11,kmadd(epsdiss2,JacPDdissipationNth2At11,kmul(epsdiss3,JacPDdissipationNth3At11)))); @@ -1235,132 +1234,35 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Dissipation); } extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) @@ -1379,7 +1281,25 @@ extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Dissipation", 18, groups); switch(fdOrder) @@ -1401,7 +1321,7 @@ extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc index 88908e9..eb95c47 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_InitGamma, + LC_LOOP3VEC(ML_BSSN_UPW_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,60 +236,17 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_UPW_InitGamma); } extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) @@ -309,7 +265,10 @@ extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_InitGamma", 3, groups); switch(fdOrder) @@ -327,7 +286,7 @@ extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc index 385b62e..dc02a4a 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_InitRHS, + LC_LOOP3VEC(ML_BSSN_UPW_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_UPW_InitRHS); } extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_InitRHS", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc index 91ef782..a77438e 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Minkowski, + LC_LOOP3VEC(ML_BSSN_UPW_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -273,132 +272,35 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Minkowski); } extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) @@ -417,7 +319,16 @@ extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Minkowski", 9, groups); switch(fdOrder) @@ -435,7 +346,7 @@ extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc index a9c43d2..be7f83d 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -61,8 +62,6 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -99,9 +98,9 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -120,14 +119,14 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -141,9 +140,9 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -224,7 +223,7 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHS1, + LC_LOOP3VEC(ML_BSSN_UPW_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1088,7 +1087,8 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1096,12 +1096,14 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1307,13 +1309,13 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(12),kmul(Atu13,kmadd(Gt113,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(alphaL,kmadd(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-4),kmadd(ToReal(6),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(12),kmul(Atu23,kmadd(Gt223,ToReal(12),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(alphaL,kmadd(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-150.7964473723100754462068823974161384415),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-4),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(6),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(12),kmul(Atu33,kmadd(Gt333,ToReal(6),kmul(cdphi3,ToReal(36))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1322,18 +1324,18 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),ToReal(12.56637061435917295385057353311801153679))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; CCTK_REAL_VEC alpharhsL = - kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(AL,ToReal(LapseACoeff),kmul(kmadd(kadd(alphaL,ToReal(-1)),ToReal(AlphaDriver),trKL),ksub(ToReal(1),ToReal(LapseACoeff))))))); CCTK_REAL_VEC ArhsL = kmul(knmsub(AL,ToReal(AlphaDriver),dottrK),ToReal(LapseACoeff)); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -1345,27 +1347,24 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d if (harmonicShift) { beta1rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))); beta2rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))); beta3rhsL = - kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); + kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))); } else { beta1rhsL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta2rhsL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); beta3rhsL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } CCTK_REAL_VEC B1rhsL = @@ -1377,108 +1376,29 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHS1); } extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) @@ -1497,7 +1417,26 @@ extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHS1", 19, groups); switch(fdOrder) @@ -1519,7 +1458,7 @@ extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc index f3529a3..4178637 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHS2, + LC_LOOP3VEC(ML_BSSN_UPW_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1420,7 +1419,8 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1428,12 +1428,14 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1676,16 +1678,16 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1722,17 +1724,17 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC em4phi = INV(e4phi); - CCTK_REAL_VEC g11 = kmul(e4phi,gt11L); + CCTK_REAL_VEC g11 = kmul(gt11L,e4phi); - CCTK_REAL_VEC g12 = kmul(e4phi,gt12L); + CCTK_REAL_VEC g12 = kmul(gt12L,e4phi); - CCTK_REAL_VEC g13 = kmul(e4phi,gt13L); + CCTK_REAL_VEC g13 = kmul(gt13L,e4phi); - CCTK_REAL_VEC g22 = kmul(e4phi,gt22L); + CCTK_REAL_VEC g22 = kmul(gt22L,e4phi); - CCTK_REAL_VEC g23 = kmul(e4phi,gt23L); + CCTK_REAL_VEC g23 = kmul(gt23L,e4phi); - CCTK_REAL_VEC g33 = kmul(e4phi,gt33L); + CCTK_REAL_VEC g33 = kmul(gt33L,e4phi); CCTK_REAL_VEC gu11 = kmul(em4phi,gtu11); @@ -1783,73 +1785,33 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmadd(em4phi,kmadd(g11,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats11),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth1beta1,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(2.),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(-2.),kmadd(At11L,kmadd(Atm11,ToReal(-2.),trKL),kmul(em4phi,kmadd(eTxxL,ToReal(-25.13274122871834590770114706623602307358),kmul(g11,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmadd(At13L,JacPDstandardNth2beta3,kmadd(em4phi,kmadd(g12,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats12),kmadd(At12L,kmadd(JacPDstandardNth3beta3,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At12L,trKL,kmadd(kmadd(At11L,Atm12,kmadd(At12L,Atm22,kmul(At13L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTxyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g12,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmadd(At12L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g13,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats13),kmadd(At13L,kmadd(JacPDstandardNth2beta2,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At13L,trKL,kmadd(kmadd(At11L,Atm13,kmadd(At12L,Atm23,kmul(At13L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTxzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g13,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmadd(em4phi,kmadd(g22,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats22),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth2beta2,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(2.),kmul(alphaL,kmadd(At22L,trKL,kmadd(kmadd(At12L,Atm12,kmadd(At22L,Atm22,kmul(At23L,Atm32))),ToReal(-2.),kmul(em4phi,kmadd(eTyyL,ToReal(-25.13274122871834590770114706623602307358),kmul(g22,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmadd(At22L,JacPDstandardNth3beta2,kmadd(em4phi,kmadd(g23,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats23),kmadd(At23L,kmadd(JacPDstandardNth1beta1,ToReal(-0.6666666666666666666666666666666666666667),kmul(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(0.3333333333333333333333333333333333333333))),kmul(alphaL,kmadd(At23L,trKL,kmadd(kmadd(At12L,Atm13,kmadd(At22L,Atm23,kmul(At23L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTyzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g23,kmul(trS,ToReal(8.377580409572781969233715688745341024526)))))))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmadd(em4phi,kmadd(g33,kmul(trAts,ToReal(-0.3333333333333333333333333333333333333333)),Ats33),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-0.6666666666666666666666666666666666666667),kmul(JacPDstandardNth3beta3,ToReal(1.333333333333333333333333333333333333333))),kmadd(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(2.),kmul(alphaL,kmadd(At33L,trKL,kmadd(kmadd(At13L,Atm13,kmadd(At23L,Atm23,kmul(At33L,Atm33))),ToReal(-2.),kmul(em4phi,kmadd(eTzzL,ToReal(-25.13274122871834590770114706623602307358),kmul(g33,kmul(trS,ToReal(8.377580409572781969233715688745341024526))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHS2); } extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) @@ -1868,7 +1830,15 @@ extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHS2", 8, groups); switch(fdOrder) @@ -1890,7 +1860,7 @@ extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc index 107fe0d..1e24ff9 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_UPW_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHSStaticBoundary); } extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_UPW_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_UPW_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc index f964149..4d6f0de 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -64,8 +65,6 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -102,9 +101,9 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -123,14 +122,14 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -144,9 +143,9 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -227,7 +226,7 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_boundary, + LC_LOOP3VEC(ML_BSSN_UPW_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -309,132 +308,35 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_boundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_boundary); } extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) @@ -453,7 +355,16 @@ extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_boundary", 9, groups); switch(fdOrder) @@ -471,7 +382,7 @@ extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_UPW_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_UPW_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc index efa9216..929992f 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -40,8 +41,6 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +77,9 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -99,14 +98,14 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -120,9 +119,9 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -203,7 +202,7 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_constraints1, + LC_LOOP3VEC(ML_BSSN_UPW_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1224,7 +1223,8 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -1232,12 +1232,14 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -1480,16 +1482,16 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int kmul(ToReal(-2),kadd(cdphi211,kmadd(SQR(cdphi1),kmul(kmadd(gt11L,gtu11,ToReal(-1)),ToReal(2)),kmul(gt11L,kmadd(cdphi211,gtu11,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmul(kmadd(cdphi1,kmadd(cdphi2,gtu12,kmul(cdphi3,gtu13)),kmul(cdphi2,kmul(cdphi3,gtu23))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi12 = - kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi3,kmul(gt12L,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi212,kmadd(gt12L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt12L,kmul(cdphi3,kmul(gtu13,ToReal(4))),kmul(cdphi2,kmadd(gt12L,kmul(gtu12,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi13 = - kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(cdphi2,kmul(gt13L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi213,kmadd(gt13L,kmadd(cdphi211,gtu11,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu11,SQR(cdphi1))))),ToReal(2),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi2,kmul(cdphi3,kmul(gtu23,ToReal(4)))))))),kmul(cdphi1,kmadd(gt13L,kmul(cdphi2,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt13L,kmul(gtu13,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi22 = kmul(ToReal(-2),kadd(cdphi222,kmadd(SQR(cdphi2),kmul(kmadd(gt22L,gtu22,ToReal(-1)),ToReal(2)),kmul(gt22L,kmadd(cdphi222,gtu22,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu33,SQR(cdphi3))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmul(kmadd(cdphi1,kmul(cdphi3,gtu13),kmul(cdphi2,kmadd(cdphi1,gtu12,kmul(cdphi3,gtu23)))),ToReal(4)))))))))); CCTK_REAL_VEC Rphi23 = - kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(cdphi1,kmul(gt23L,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); + kmul(ToReal(-2),kadd(cdphi223,kmadd(gt23L,kmadd(cdphi222,gtu22,kmadd(kmadd(cdphi212,gtu12,kmadd(cdphi213,gtu13,kmadd(cdphi223,gtu23,kmul(gtu22,SQR(cdphi2))))),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu33,kmadd(SQR(cdphi3),ToReal(2),cdphi233),kmul(cdphi1,kmul(cdphi3,kmul(gtu13,ToReal(4)))))))),kmul(cdphi2,kmadd(gt23L,kmul(cdphi1,kmul(gtu12,ToReal(4))),kmul(cdphi3,kmadd(gt23L,kmul(gtu23,ToReal(4)),ToReal(-2)))))))); CCTK_REAL_VEC Rphi33 = kmul(ToReal(-2),kadd(cdphi233,kmadd(SQR(cdphi3),kmul(kmadd(gt33L,gtu33,ToReal(-1)),ToReal(2)),kmul(gt33L,kmadd(cdphi233,gtu33,kmadd(kmadd(cdphi213,gtu13,kmul(cdphi223,gtu23)),ToReal(2),kmadd(gtu11,kmadd(SQR(cdphi1),ToReal(2),cdphi211),kmadd(gtu22,kmadd(SQR(cdphi2),ToReal(2),cdphi222),kmadd(cdphi3,kmul(kmadd(cdphi1,gtu13,kmul(cdphi2,gtu23)),ToReal(4)),kmul(gtu12,kmadd(cdphi212,ToReal(2),kmul(cdphi1,kmul(cdphi2,ToReal(4)))))))))))))); @@ -1557,38 +1559,13 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(rho,ToReal(-50.26548245743669181540229413247204614715),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2.),kmadd(kadd(SQR(Atm11),kadd(SQR(Atm22),SQR(Atm33))),ToReal(-1.),kmul(SQR(trKL),ToReal(0.6666666666666666666666666666666666666667)))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_UPW_constraints1); } extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) @@ -1607,7 +1584,15 @@ extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Ham","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Ham", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_constraints1", 8, groups); switch(fdOrder) @@ -1629,7 +1614,7 @@ extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc index 5a27be0..e15db36 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -49,8 +50,6 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -87,9 +86,9 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -108,14 +107,14 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -129,9 +128,9 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -212,7 +211,7 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_constraints2, + LC_LOOP3VEC(ML_BSSN_UPW_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -775,7 +774,8 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -783,12 +783,14 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gtl111 = kmul(JacPDstandardNth1gt11,ToReal(0.5)); @@ -908,13 +910,13 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,kadd(JacPDstandardNth2At13,JacPDstandardNth3At12),kmadd(gtu33,JacPDstandardNth3At13,kmadd(S1,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt212,kmul(At23L,Gt312)),gtu22,kmadd(kmadd(At13L,Gt112,kmadd(At22L,Gt213,kmadd(At33L,Gt312,kmul(At23L,kadd(Gt212,Gt313))))),gtu23,kmul(kmadd(At13L,Gt113,kmadd(At23L,Gt213,kmul(At33L,Gt313))),gtu33))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At12,kadd(JacPDstandardNth2At11,kmadd(At13L,kmul(Gt312,ToReal(-3.)),kmul(At22L,kmul(Gt211,ToReal(-1.)))))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kadd(JacPDstandardNth3At11,kmadd(At13L,kmul(Gt313,ToReal(-3.)),kmul(At23L,kmul(Gt211,ToReal(-1.)))))),kmadd(Gt311,kmadd(At13L,kmul(gtu11,ToReal(-2.)),kmul(kmadd(At23L,gtu12,kmul(At33L,gtu13)),ToReal(-1.))),kmadd(JacPDstandardNth1trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At13L,kmadd(kmadd(Gt322,gtu22,kmul(Gt333,gtu33)),ToReal(-1.),kmadd(cdphi3,kmul(gtu33,ToReal(6.)),kmadd(gtu13,kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmul(gtu23,kmadd(Gt323,ToReal(-2.),kmul(cdphi2,ToReal(6.))))))),kmadd(At11L,kmadd(Gt123,kmul(gtu23,ToReal(-2.)),kmadd(kmadd(Gt122,gtu22,kmul(Gt133,gtu33)),ToReal(-1.),kmadd(gtu11,kmadd(Gt111,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmul(cdphi2,ToReal(6.))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmul(cdphi3,ToReal(6.)))))))),kmul(At12L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(kmadd(Gt211,gtu11,kmul(Gt223,gtu23)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(gtu12,kmadd(Gt212,ToReal(-3.),kmadd(Gt111,ToReal(-1.),kmul(cdphi1,ToReal(6.)))),kmadd(gtu22,kmadd(kadd(Gt112,Gt222),ToReal(-1.),kmul(cdphi2,ToReal(6.))),kmul(gtu23,kmadd(Gt113,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,kadd(JacPDstandardNth1At22,JacPDstandardNth2At12),kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(S2,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At22L,Gt211,kmadd(At23L,Gt311,kmul(At13L,Gt312))),gtu11,kmadd(kmadd(At23L,Gt212,kmul(At33L,Gt312)),gtu13,kmadd(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),gtu33,kmul(At13L,kmadd(Gt322,gtu12,kmadd(Gt112,gtu13,kmadd(Gt122,gtu23,kmul(Gt123,gtu33))))))))),ToReal(-1.),kmadd(gtu23,kadd(JacPDstandardNth2At23,kadd(JacPDstandardNth3At22,kmadd(kmadd(At22L,Gt223,kmul(At23L,Gt323)),ToReal(-3.),kmul(kmadd(At23L,Gt222,kmul(At33L,Gt322)),ToReal(-1.))))),kmadd(gtu13,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth3At12,kmadd(At23L,kmul(Gt313,ToReal(-2.)),kmul(At13L,kmul(Gt323,ToReal(-1.)))))),kmadd(JacPDstandardNth2trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At23L,kmadd(Gt312,kmul(gtu12,ToReal(-3.)),kmadd(Gt322,kmul(gtu22,ToReal(-2.)),kmadd(Gt333,kmul(gtu33,ToReal(-1.)),kmul(kmadd(cdphi1,gtu13,kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33))),ToReal(6.))))),kmadd(At22L,kmadd(kmadd(Gt213,gtu13,kmul(Gt222,gtu22)),ToReal(-2.),kmadd(Gt233,kmul(gtu33,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu22,kmul(cdphi3,gtu23)),ToReal(6.),kmul(gtu12,kmadd(Gt212,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmul(At12L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt122,kmul(gtu22,ToReal(-2.)),kmadd(Gt133,kmul(gtu33,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt212),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-3.),kmadd(Gt222,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-2.),kmadd(Gt223,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu13,kadd(JacPDstandardNth1At33,JacPDstandardNth3At13),kmadd(gtu33,JacPDstandardNth3At33,kmadd(S3,ToReal(-25.13274122871834590770114706623602307358),kmadd(kmadd(kmadd(At23L,Gt211,kmadd(At12L,Gt213,kmul(At33L,Gt311))),gtu11,kmadd(kmadd(At22L,Gt213,kmul(At12L,kadd(Gt113,Gt223))),gtu12,kmadd(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(kmadd(At23L,Gt222,kmul(At22L,Gt223)),gtu22,kmul(At12L,kmadd(Gt233,gtu13,kmadd(Gt123,gtu22,kmul(Gt133,gtu23)))))))),ToReal(-1.),kmadd(gtu12,kadd(JacPDstandardNth1At23,kadd(JacPDstandardNth2At13,kmadd(At33L,kmul(Gt312,ToReal(-2.)),kmul(At23L,kmul(Gt313,ToReal(-1.)))))),kmadd(gtu23,kadd(JacPDstandardNth2At33,kadd(JacPDstandardNth3At23,kmadd(kmadd(At23L,Gt223,kmul(At33L,Gt323)),ToReal(-3.),kmul(kmadd(At22L,Gt233,kmul(At23L,Gt333)),ToReal(-1.))))),kmadd(JacPDstandardNth3trK,ToReal(-0.6666666666666666666666666666666666666667),kmadd(At33L,kmadd(Gt333,kmul(gtu33,ToReal(-2.)),kmadd(Gt322,kmul(gtu22,ToReal(-1.)),kmadd(kmadd(cdphi2,gtu23,kmul(cdphi3,gtu33)),ToReal(6.),kmul(gtu13,kmadd(Gt313,ToReal(-3.),kmul(cdphi1,ToReal(6.))))))),kmadd(At23L,kmadd(Gt213,kmul(gtu13,ToReal(-3.)),kmadd(Gt233,kmul(gtu33,ToReal(-2.)),kmadd(cdphi3,kmul(gtu23,ToReal(6.)),kmadd(gtu12,kmadd(Gt212,ToReal(-2.),kmul(cdphi1,ToReal(6.))),kmul(gtu22,kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))))))),kmul(At13L,kmadd(Gt123,kmul(gtu23,ToReal(-3.)),kmadd(Gt133,kmul(gtu33,ToReal(-2.)),kmadd(Gt122,kmul(gtu22,ToReal(-1.)),kmadd(gtu11,kmadd(kadd(Gt111,Gt313),ToReal(-1.),kmul(cdphi1,ToReal(6.))),kmadd(gtu12,kmadd(Gt112,ToReal(-2.),kmadd(Gt323,ToReal(-1.),kmul(cdphi2,ToReal(6.)))),kmul(gtu13,kmadd(Gt113,ToReal(-3.),kmadd(Gt333,ToReal(-1.),kmul(cdphi3,ToReal(6.)))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -930,64 +932,18 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_UPW_constraints2); } extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) @@ -1006,7 +962,18 @@ extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_cons_detg","ML_BSSN_UPW::ML_cons_Gamma","ML_BSSN_UPW::ML_cons_traceA","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_mom","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_cons_detg", + "ML_BSSN_UPW::ML_cons_Gamma", + "ML_BSSN_UPW::ML_cons_traceA", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_mom", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_constraints2", 11, groups); switch(fdOrder) @@ -1028,7 +995,7 @@ extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc index 5c3413c..acfc692 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_UPW_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -296,25 +295,25 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC gt33L = kmul(em4phi,g33); trKL = - kmadd(gu11,kxxL,kmadd(gu22,kyyL,kmadd(gu33,kzzL,kmul(kmadd(gu12,kxyL,kmadd(gu13,kxzL,kmul(gu23,kyzL))),ToReal(2))))); + kmadd(kxxL,gu11,kmadd(kyyL,gu22,kmadd(kzzL,gu33,kmul(kmadd(kxyL,gu12,kmadd(kxzL,gu13,kmul(kyzL,gu23))),ToReal(2))))); CCTK_REAL_VEC At11L = - kmul(em4phi,kmadd(g11,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxxL)); + kmul(em4phi,kmadd(trKL,kmul(g11,ToReal(-0.333333333333333333333333333333)),kxxL)); CCTK_REAL_VEC At12L = - kmul(em4phi,kmadd(g12,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxyL)); + kmul(em4phi,kmadd(trKL,kmul(g12,ToReal(-0.333333333333333333333333333333)),kxyL)); CCTK_REAL_VEC At13L = - kmul(em4phi,kmadd(g13,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kxzL)); + kmul(em4phi,kmadd(trKL,kmul(g13,ToReal(-0.333333333333333333333333333333)),kxzL)); CCTK_REAL_VEC At22L = - kmul(em4phi,kmadd(g22,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyyL)); + kmul(em4phi,kmadd(trKL,kmul(g22,ToReal(-0.333333333333333333333333333333)),kyyL)); CCTK_REAL_VEC At23L = - kmul(em4phi,kmadd(g23,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kyzL)); + kmul(em4phi,kmadd(trKL,kmul(g23,ToReal(-0.333333333333333333333333333333)),kyzL)); CCTK_REAL_VEC At33L = - kmul(em4phi,kmadd(g33,kmul(trKL,ToReal(-0.333333333333333333333333333333)),kzzL)); + kmul(em4phi,kmadd(trKL,kmul(g33,ToReal(-0.333333333333333333333333333333)),kzzL)); CCTK_REAL_VEC alphaL = alpL; @@ -324,104 +323,28 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertFromADMBase); } extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) @@ -440,7 +363,17 @@ extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertFromADMBase", 10, groups); switch(fdOrder) @@ -458,7 +391,7 @@ extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc index d546630..332de3c 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -46,8 +47,6 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -84,9 +83,9 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -105,14 +104,14 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -126,9 +125,9 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -209,7 +208,7 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_UPW_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -614,7 +613,8 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -622,12 +622,14 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC Gt111 = kmul(ToReal(0.5),kmadd(gtu11,JacPDstandardNth1gt11,knmsub(gtu12,JacPDstandardNth2gt11,kmsub(kmadd(gtu12,JacPDstandardNth1gt12,kmul(gtu13,JacPDstandardNth1gt13)),ToReal(2),kmul(gtu13,JacPDstandardNth3gt11))))); @@ -705,13 +707,13 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -722,60 +724,17 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertFromADMBaseGamma); } extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -794,7 +753,17 @@ extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -816,7 +785,7 @@ extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc index c7ecab4..af49a26 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -250,17 +249,17 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC e4phi = IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = kmul(e4phi,gt11L); + gxxL = kmul(gt11L,e4phi); - gxyL = kmul(e4phi,gt12L); + gxyL = kmul(gt12L,e4phi); - gxzL = kmul(e4phi,gt13L); + gxzL = kmul(gt13L,e4phi); - gyyL = kmul(e4phi,gt22L); + gyyL = kmul(gt22L,e4phi); - gyzL = kmul(e4phi,gt23L); + gyzL = kmul(gt23L,e4phi); - gzzL = kmul(e4phi,gt33L); + gzzL = kmul(gt33L,e4phi); CCTK_REAL_VEC kxxL = kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); @@ -288,96 +287,26 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBase); } extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) @@ -396,7 +325,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBase", 10, groups); switch(fdOrder) @@ -414,7 +353,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc index df5e612..4671a53 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -682,7 +681,8 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -690,15 +690,17 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -707,62 +709,22 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c kmsub(kmadd(beta1L,JacPDupwindNth1alpha,kmadd(beta2L,JacPDupwindNth2alpha,kmul(beta3L,JacPDupwindNth3alpha))),ToReal(LapseAdvectionCoeff),kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); CCTK_REAL_VEC dtbetaxL = - kmadd(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu11,JacPDstandardNth1alpha,kmadd(gtu12,JacPDstandardNth2alpha,kmul(gtu13,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth1gt11,SQR(gtu11),kmul(JacPDstandardNth1gt22,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu13,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu13,JacPDstandardNth1gt33,kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu22,JacPDstandardNth3gt22))))),kmadd(gtu11,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu12,JacPDstandardNth2gt11,kmadd(gtu13,JacPDstandardNth3gt11,kmadd(gtu23,kmul(JacPDstandardNth1gt23,ToReal(-2)),knmsub(gtu22,JacPDstandardNth1gt22,kmadd(kmadd(gtu12,JacPDstandardNth1gt12,kmadd(gtu13,JacPDstandardNth1gt13,kmul(gtu22,JacPDstandardNth2gt12))),ToReal(2),kmadd(gtu23,kmul(JacPDstandardNth2gt13,ToReal(2)),kmadd(gtu23,kmul(JacPDstandardNth3gt12,ToReal(2)),kmul(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33))))))))))),kmul(gtu12,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu23,kmul(JacPDstandardNth3gt22,ToReal(2)),kmadd(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33),kmul(gtu13,kmul(JacPDstandardNth1gt23,ToReal(4))))))))))))))))),kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetayL = - kmadd(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); + kmadd(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu12,JacPDstandardNth1alpha,kmadd(gtu22,JacPDstandardNth2alpha,kmul(gtu23,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmadd(JacPDstandardNth2gt22,SQR(gtu22),kmul(JacPDstandardNth2gt11,kmul(SQR(gtu12),ToReal(2)))),kmadd(gtu23,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt13,kmadd(gtu13,JacPDstandardNth1gt33,kmul(gtu23,JacPDstandardNth2gt33))),ToReal(2),kmul(gtu11,JacPDstandardNth3gt11))))),kmadd(gtu22,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu23,JacPDstandardNth3gt22,kmadd(kmadd(gtu23,JacPDstandardNth2gt23,kmul(gtu13,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth3gt12,JacPDstandardNth2gt13)))),ToReal(2),kmadd(gtu11,kmsub(JacPDstandardNth1gt12,ToReal(2),JacPDstandardNth2gt11),kmul(gtu33,kmsub(JacPDstandardNth3gt23,ToReal(2),JacPDstandardNth2gt33))))))),kmul(gtu12,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu13,kmul(JacPDstandardNth3gt11,ToReal(2)),kmadd(gtu22,kmadd(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmsub(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth2gt13,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); CCTK_REAL_VEC dtbetazL = - kmadd(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),kmul(phiL,ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 - + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + kmadd(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -781,7 +743,19 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -803,7 +777,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc index aa1c738..723f9b0 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -43,8 +44,6 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -81,9 +80,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -102,14 +101,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -123,9 +122,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -206,7 +205,7 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -252,7 +251,7 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -275,60 +274,23 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -347,7 +309,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -365,7 +337,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc index 2b951cf..d33767f 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -237,7 +236,7 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri /* Calculate temporaries and grid functions */ CCTK_REAL_VEC eta = - kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); + kmul(INV(kfmax(rL,ToReal(SpatialBetaDriverRadius))),ToReal(SpatialBetaDriverRadius)); CCTK_REAL_VEC theta = kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); @@ -260,60 +259,23 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri else { dtbetaxL = - kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt1L,kmadd(ksub(B1L,Xt1L),ToReal(ShiftBCoeff),kmul(beta1L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetayL = - kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + kmul(theta,kmul(kadd(Xt2L,kmadd(ksub(B2L,Xt2L),ToReal(ShiftBCoeff),kmul(beta2L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); dtbetazL = - kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + - ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; + kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -332,7 +294,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -350,7 +322,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc index 83275eb..2f87e25 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" #include "vectors.h" @@ -28,8 +29,6 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -66,9 +65,9 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -87,14 +86,14 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -108,9 +107,9 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -191,7 +190,7 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_enforce, + LC_LOOP3VEC(ML_BSSN_UPW_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,7 +237,8 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons /* Calculate temporaries and grid functions */ CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); + CCTK_REAL_VEC gtu11 = + kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); CCTK_REAL_VEC gtu12 = kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); @@ -246,12 +246,14 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC gtu13 = kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); + CCTK_REAL_VEC gtu22 = + kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); CCTK_REAL_VEC gtu23 = kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); + CCTK_REAL_VEC gtu33 = + kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); CCTK_REAL_VEC trAt = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); @@ -276,60 +278,17 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_enforce); + LC_ENDLOOP3VEC(ML_BSSN_UPW_enforce); } extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) @@ -348,7 +307,10 @@ extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_enforce", 3, groups); switch(fdOrder) @@ -366,7 +328,7 @@ extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/make.code.defn b/ML_BSSN_UPW/src/make.code.defn index ba8a337..f858947 100644 --- a/ML_BSSN_UPW/src/make.code.defn +++ b/ML_BSSN_UPW/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_UPW_Minkowski.cc ML_BSSN_UPW_convertFromADMBase.cc ML_BSSN_UPW_InitGamma.cc ML_BSSN_UPW_convertFromADMBaseGamma.cc ML_BSSN_UPW_RHS1.cc ML_BSSN_UPW_RHS2.cc ML_BSSN_UPW_Dissipation.cc ML_BSSN_UPW_Advect.cc ML_BSSN_UPW_InitRHS.cc ML_BSSN_UPW_RHSStaticBoundary.cc ML_BSSN_UPW_enforce.cc ML_BSSN_UPW_boundary.cc ML_BSSN_UPW_convertToADMBase.cc ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_UPW_constraints1.cc ML_BSSN_UPW_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_UPW_Minkowski.cc ML_BSSN_UPW_convertFromADMBase.cc ML_BSSN_UPW_InitGamma.cc ML_BSSN_UPW_convertFromADMBaseGamma.cc ML_BSSN_UPW_RHS1.cc ML_BSSN_UPW_RHS2.cc ML_BSSN_UPW_Dissipation.cc ML_BSSN_UPW_Advect.cc ML_BSSN_UPW_InitRHS.cc ML_BSSN_UPW_RHSStaticBoundary.cc ML_BSSN_UPW_enforce.cc ML_BSSN_UPW_boundary.cc ML_BSSN_UPW_convertToADMBase.cc ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_UPW_constraints1.cc ML_BSSN_UPW_constraints2.cc Boundaries.cc diff --git a/ML_WaveToy/configuration.ccl b/ML_WaveToy/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_WaveToy/configuration.ccl +++ b/ML_WaveToy/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_WaveToy/param.ccl b/ML_WaveToy/param.ccl index 34e3124..dec4281 100644 --- a/ML_WaveToy/param.ccl +++ b/ML_WaveToy/param.ccl @@ -41,6 +41,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:2 :: "" +} 1 + +restricted: CCTK_INT WT_Gaussian_calc_every "WT_Gaussian_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_WaveToy/schedule.ccl b/ML_WaveToy/schedule.ccl index 9517af0..20b055e 100644 --- a/ML_WaveToy/schedule.ccl +++ b/ML_WaveToy/schedule.ccl @@ -43,12 +43,6 @@ schedule ML_WaveToy_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_WaveToy_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_WaveToy_RegisterSymmetries in SymmetryRegister { LANG: C @@ -58,11 +52,17 @@ schedule ML_WaveToy_RegisterSymmetries in SymmetryRegister schedule WT_Gaussian AT initial { LANG: C + WRITES: ML_WaveToy::WT_rho + WRITES: ML_WaveToy::WT_u } "WT_Gaussian" schedule WT_RHS IN MoL_CalcRHS { LANG: C + READS: ML_WaveToy::WT_rho + READS: ML_WaveToy::WT_u + WRITES: ML_WaveToy::WT_rhorhs + WRITES: ML_WaveToy::WT_urhs } "WT_RHS" schedule WT_RHS AT analysis @@ -70,6 +70,10 @@ schedule WT_RHS AT analysis LANG: C SYNC: WT_rhorhs SYNC: WT_urhs + READS: ML_WaveToy::WT_rho + READS: ML_WaveToy::WT_u + WRITES: ML_WaveToy::WT_rhorhs + WRITES: ML_WaveToy::WT_urhs } "WT_RHS" schedule ML_WaveToy_SelectBoundConds in MoL_PostStep @@ -86,6 +90,12 @@ schedule ML_WaveToy_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_WaveToy_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_WaveToy_ApplyBCs in MoL_PostStep after ML_WaveToy_SelectBoundConds { # no language specified diff --git a/ML_WaveToy/src/WT_Gaussian.cc b/ML_WaveToy/src/WT_Gaussian.cc index e6faf2f..c88259e 100644 --- a/ML_WaveToy/src/WT_Gaussian.cc +++ b/ML_WaveToy/src/WT_Gaussian.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -27,8 +28,6 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -59,9 +58,9 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -76,7 +75,7 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (WT_Gaussian, + CCTK_LOOP3(WT_Gaussian, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -99,7 +98,7 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i rho[index] = rhoL; u[index] = uL; } - LC_ENDLOOP3 (WT_Gaussian); + CCTK_ENDLOOP3(WT_Gaussian); } extern "C" void WT_Gaussian(CCTK_ARGUMENTS) @@ -118,11 +117,13 @@ extern "C" void WT_Gaussian(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_WaveToy::WT_rho","ML_WaveToy::WT_u"}; + const char *const groups[] = { + "ML_WaveToy::WT_rho", + "ML_WaveToy::WT_u"}; GenericFD_AssertGroupStorage(cctkGH, "WT_Gaussian", 2, groups); - GenericFD_LoopOverEverything(cctkGH, &WT_Gaussian_Body); + GenericFD_LoopOverEverything(cctkGH, WT_Gaussian_Body); if (verbose > 1) { diff --git a/ML_WaveToy/src/WT_RHS.cc b/ML_WaveToy/src/WT_RHS.cc index 01cb50e..2dd39f9 100644 --- a/ML_WaveToy/src/WT_RHS.cc +++ b/ML_WaveToy/src/WT_RHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -42,8 +43,6 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -74,9 +73,9 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -91,7 +90,7 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (WT_RHS, + CCTK_LOOP3(WT_RHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -120,7 +119,7 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co rhorhs[index] = rhorhsL; urhs[index] = urhsL; } - LC_ENDLOOP3 (WT_RHS); + CCTK_ENDLOOP3(WT_RHS); } extern "C" void WT_RHS(CCTK_ARGUMENTS) @@ -139,12 +138,16 @@ extern "C" void WT_RHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_WaveToy::WT_rho","ML_WaveToy::WT_rhorhs","ML_WaveToy::WT_u","ML_WaveToy::WT_urhs"}; + const char *const groups[] = { + "ML_WaveToy::WT_rho", + "ML_WaveToy::WT_rhorhs", + "ML_WaveToy::WT_u", + "ML_WaveToy::WT_urhs"}; GenericFD_AssertGroupStorage(cctkGH, "WT_RHS", 4, groups); GenericFD_EnsureStencilFits(cctkGH, "WT_RHS", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &WT_RHS_Body); + GenericFD_LoopOverInterior(cctkGH, WT_RHS_Body); if (verbose > 1) { diff --git a/ML_WaveToy/src/make.code.defn b/ML_WaveToy/src/make.code.defn index fbd9e15..a36e2d9 100644 --- a/ML_WaveToy/src/make.code.defn +++ b/ML_WaveToy/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc WT_Gaussian.cc WT_RHS.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc WT_Gaussian.cc WT_RHS.cc Boundaries.cc diff --git a/ML_hydro/configuration.ccl b/ML_hydro/configuration.ccl index 8e2c3c5..0a66ec2 100644 --- a/ML_hydro/configuration.ccl +++ b/ML_hydro/configuration.ccl @@ -1,4 +1,6 @@ # File produced by Kranc REQUIRES GenericFD -REQUIRES LoopControl +OPTIONAL LoopControl +{ +} diff --git a/ML_hydro/param.ccl b/ML_hydro/param.ccl index 957d11a..be6c2a8 100644 --- a/ML_hydro/param.ccl +++ b/ML_hydro/param.ccl @@ -78,6 +78,12 @@ CCTK_INT rhs_timelevels "Number of active RHS timelevels" STEERABLE=RECOVER } 1 restricted: +CCTK_INT other_timelevels "Number of active timelevels for non-evolved grid functions" STEERABLE=RECOVER +{ + 0:3 :: "" +} 1 + +restricted: CCTK_INT hydro_vacuum_calc_every "hydro_vacuum_calc_every" STEERABLE=ALWAYS { *:* :: "" diff --git a/ML_hydro/schedule.ccl b/ML_hydro/schedule.ccl index 78099ec..055b0ff 100644 --- a/ML_hydro/schedule.ccl +++ b/ML_hydro/schedule.ccl @@ -1,21 +1,45 @@ # File produced by Kranc -STORAGE: eneflux_group[1] +if (other_timelevels == 1) +{ + STORAGE: eneflux_group[1] +} -STORAGE: eps_group[1] +if (other_timelevels == 1) +{ + STORAGE: eps_group[1] +} -STORAGE: massflux_group[1] +if (other_timelevels == 1) +{ + STORAGE: massflux_group[1] +} -STORAGE: momflux_group[1] +if (other_timelevels == 1) +{ + STORAGE: momflux_group[1] +} -STORAGE: press_group[1] +if (other_timelevels == 1) +{ + STORAGE: press_group[1] +} -STORAGE: rho_group[1] +if (other_timelevels == 1) +{ + STORAGE: rho_group[1] +} -STORAGE: vel_group[1] +if (other_timelevels == 1) +{ + STORAGE: vel_group[1] +} -STORAGE: vol_group[1] +if (other_timelevels == 1) +{ + STORAGE: vol_group[1] +} if (timelevels == 1) { @@ -101,12 +125,6 @@ schedule ML_hydro_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_hydro_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_hydro_RegisterSymmetries in SymmetryRegister { LANG: C @@ -119,6 +137,9 @@ if (CCTK_EQUALS(initial_data, "vacuum")) schedule hydro_vacuum IN ADMBase_InitialData { LANG: C + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_vacuum" } @@ -128,17 +149,41 @@ if (CCTK_EQUALS(initial_data, "sound wave")) schedule hydro_soundWave IN ADMBase_InitialData { LANG: C + READS: grid::coordinates + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_soundWave" } schedule hydro_prim2con AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ML_hydro::eps_group + READS: ML_hydro::mass_group + READS: ML_hydro::rho_group + READS: ML_hydro::vel_group + READS: ML_hydro::vol_group + WRITES: ML_hydro::ene_group + WRITES: ML_hydro::mass_group + WRITES: ML_hydro::mom_group + WRITES: ML_hydro::vol_group } "hydro_prim2con" schedule hydro_con2prim IN hydro_con2primGroup { LANG: C + READS: ML_hydro::ene_group + READS: ML_hydro::eps_group + READS: ML_hydro::mass_group + READS: ML_hydro::mom_group + READS: ML_hydro::rho_group + READS: ML_hydro::vel_group + READS: ML_hydro::vol_group + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::press_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_con2prim" schedule hydro_RHS IN hydro_evolCalcGroup AFTER hydro_fluxes @@ -147,6 +192,12 @@ schedule hydro_RHS IN hydro_evolCalcGroup AFTER hydro_fluxes SYNC: ene_grouprhs SYNC: mass_grouprhs SYNC: mom_grouprhs + READS: ML_hydro::eneflux_group + READS: ML_hydro::massflux_group + READS: ML_hydro::momflux_group + WRITES: ML_hydro::ene_grouprhs + WRITES: ML_hydro::mass_grouprhs + WRITES: ML_hydro::mom_grouprhs } "hydro_RHS" schedule ML_hydro_SelectBoundConds in MoL_PostStep @@ -164,6 +215,12 @@ schedule ML_hydro_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_hydro_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_hydro_ApplyBCs in MoL_PostStep after ML_hydro_SelectBoundConds { # no language specified diff --git a/ML_hydro/src/hydro_RHS.cc b/ML_hydro/src/hydro_RHS.cc index c4d92bf..3f41d0f 100644 --- a/ML_hydro/src/hydro_RHS.cc +++ b/ML_hydro/src/hydro_RHS.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -45,8 +46,6 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -77,9 +76,9 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -94,7 +93,7 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (hydro_RHS, + CCTK_LOOP3(hydro_RHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -161,7 +160,7 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int mom2rhs[index] = mom2rhsL; mom3rhs[index] = mom3rhsL; } - LC_ENDLOOP3 (hydro_RHS); + CCTK_ENDLOOP3(hydro_RHS); } extern "C" void hydro_RHS(CCTK_ARGUMENTS) @@ -180,12 +179,18 @@ extern "C" void hydro_RHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eneflux_group","ML_hydro::ene_grouprhs","ML_hydro::massflux_group","ML_hydro::mass_grouprhs","ML_hydro::momflux_group","ML_hydro::mom_grouprhs"}; + const char *const groups[] = { + "ML_hydro::eneflux_group", + "ML_hydro::ene_grouprhs", + "ML_hydro::massflux_group", + "ML_hydro::mass_grouprhs", + "ML_hydro::momflux_group", + "ML_hydro::mom_grouprhs"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_RHS", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "hydro_RHS", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &hydro_RHS_Body); + GenericFD_LoopOverInterior(cctkGH, hydro_RHS_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_con2prim.cc b/ML_hydro/src/hydro_con2prim.cc index f9e9188..716321d 100644 --- a/ML_hydro/src/hydro_con2prim.cc +++ b/ML_hydro/src/hydro_con2prim.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -27,8 +28,6 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -59,9 +58,9 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -76,7 +75,7 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (hydro_con2prim, + CCTK_LOOP3(hydro_con2prim, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -110,8 +109,8 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir vel3L = mom3L*INV(massL); - epsL = 0.5*INV(massL)*(2*eneL - massL*(SQR(vel1L) + SQR(vel2L) + - SQR(vel3L))); + epsL = 0.5*INV(massL)*(2*eneL - massL*(SQR(vel1L) + + SQR(vel2L) + SQR(vel3L))); CCTK_REAL pressL = epsL*rhoL*ToReal(Gamma); @@ -123,7 +122,7 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir vel2[index] = vel2L; vel3[index] = vel3L; } - LC_ENDLOOP3 (hydro_con2prim); + CCTK_ENDLOOP3(hydro_con2prim); } extern "C" void hydro_con2prim(CCTK_ARGUMENTS) @@ -142,11 +141,19 @@ extern "C" void hydro_con2prim(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::ene_group","ML_hydro::eps_group","ML_hydro::mass_group","ML_hydro::mom_group","ML_hydro::press_group","ML_hydro::rho_group","ML_hydro::vel_group","ML_hydro::vol_group"}; + const char *const groups[] = { + "ML_hydro::ene_group", + "ML_hydro::eps_group", + "ML_hydro::mass_group", + "ML_hydro::mom_group", + "ML_hydro::press_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group", + "ML_hydro::vol_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_con2prim", 8, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_con2prim_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_con2prim_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_prim2con.cc b/ML_hydro/src/hydro_prim2con.cc index 7c33d60..d9aa747 100644 --- a/ML_hydro/src/hydro_prim2con.cc +++ b/ML_hydro/src/hydro_prim2con.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -27,8 +28,6 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -59,9 +58,9 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -76,7 +75,7 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (hydro_prim2con, + CCTK_LOOP3(hydro_prim2con, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -119,7 +118,7 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir mom3[index] = mom3L; vol[index] = volL; } - LC_ENDLOOP3 (hydro_prim2con); + CCTK_ENDLOOP3(hydro_prim2con); } extern "C" void hydro_prim2con(CCTK_ARGUMENTS) @@ -138,11 +137,18 @@ extern "C" void hydro_prim2con(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::ene_group","ML_hydro::eps_group","ML_hydro::mass_group","ML_hydro::mom_group","ML_hydro::rho_group","ML_hydro::vel_group","ML_hydro::vol_group"}; + const char *const groups[] = { + "ML_hydro::ene_group", + "ML_hydro::eps_group", + "ML_hydro::mass_group", + "ML_hydro::mom_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group", + "ML_hydro::vol_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_prim2con", 7, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_prim2con_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_prim2con_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_soundWave.cc b/ML_hydro/src/hydro_soundWave.cc index ca8be3d..64f5d15 100644 --- a/ML_hydro/src/hydro_soundWave.cc +++ b/ML_hydro/src/hydro_soundWave.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -27,8 +28,6 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -59,9 +58,9 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -76,7 +75,7 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (hydro_soundWave, + CCTK_LOOP3(hydro_soundWave, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -94,11 +93,11 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di /* Calculate temporaries and grid functions */ CCTK_REAL rhoL = 1.; - CCTK_REAL vel1L = Sin(2*Pi*xL*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel1L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); - CCTK_REAL vel2L = Sin(2*Pi*xL*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel2L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); - CCTK_REAL vel3L = Sin(2*Pi*xL*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel3L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); CCTK_REAL epsL = 1.; @@ -109,7 +108,7 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di vel2[index] = vel2L; vel3[index] = vel3L; } - LC_ENDLOOP3 (hydro_soundWave); + CCTK_ENDLOOP3(hydro_soundWave); } extern "C" void hydro_soundWave(CCTK_ARGUMENTS) @@ -128,11 +127,15 @@ extern "C" void hydro_soundWave(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eps_group","grid::coordinates","ML_hydro::rho_group","ML_hydro::vel_group"}; + const char *const groups[] = { + "ML_hydro::eps_group", + "grid::coordinates", + "ML_hydro::rho_group", + "ML_hydro::vel_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_soundWave", 4, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_soundWave_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_soundWave_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_vacuum.cc b/ML_hydro/src/hydro_vacuum.cc index f2299c6..5878b58 100644 --- a/ML_hydro/src/hydro_vacuum.cc +++ b/ML_hydro/src/hydro_vacuum.cc @@ -12,6 +12,7 @@ #include "cctk_Parameters.h" #include "GenericFD.h" #include "Differencing.h" +#include "cctk_Loop.h" #include "loopcontrol.h" /* Define macros used in calculations */ @@ -27,8 +28,6 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -59,9 +58,9 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -76,7 +75,7 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (hydro_vacuum, + CCTK_LOOP3(hydro_vacuum, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -108,7 +107,7 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, vel2[index] = vel2L; vel3[index] = vel3L; } - LC_ENDLOOP3 (hydro_vacuum); + CCTK_ENDLOOP3(hydro_vacuum); } extern "C" void hydro_vacuum(CCTK_ARGUMENTS) @@ -127,11 +126,14 @@ extern "C" void hydro_vacuum(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eps_group","ML_hydro::rho_group","ML_hydro::vel_group"}; + const char *const groups[] = { + "ML_hydro::eps_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_vacuum", 3, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_vacuum_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_vacuum_Body); if (verbose > 1) { diff --git a/ML_hydro/src/make.code.defn b/ML_hydro/src/make.code.defn index f5bbd24..f7c60fa 100644 --- a/ML_hydro/src/make.code.defn +++ b/ML_hydro/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc hydro_vacuum.cc hydro_soundWave.cc hydro_prim2con.cc hydro_con2prim.cc hydro_RHS.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc hydro_vacuum.cc hydro_soundWave.cc hydro_prim2con.cc hydro_con2prim.cc hydro_RHS.cc Boundaries.cc diff --git a/m/McLachlanW.m b/m/McLachlanW.m index 4883dde..e8a329b 100644 --- a/m/McLachlanW.m +++ b/m/McLachlanW.m @@ -107,8 +107,6 @@ ddetgtExpr[la_] = Sum [D[Det[MatrixOfComponents[gt[la, lb]]], X] PD[X, la], {X, Union[Flatten[MatrixOfComponents[gt[la, lb]]]]}]; -pi = N[Pi,40]; - (******************************************************************************) (* Groups *) (******************************************************************************) @@ -481,14 +479,14 @@ evolCalcBSSNW = - Xtn[uj] PD[beta[ui],lj] + (2/3) Xtn[ui] PD[beta[uj],lj] (* Equation (4.28) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (- 16 pi alpha gtu[ui,uj] S[lj]), + + addMatter (- 16 Pi alpha gtu[ui,uj] S[lj]), (* PRD 62, 044034 (2000), eqn. (11) *) dot[trK] -> - gu[ua,ub] CD[alpha,la,lb] + alpha (Atm[ua,lb] Atm[ub,la] + (1/3) trK^2) + ( betam[ua] PDm[trK,la] + betap[ua] PDp[trK,la] ) (* Equation (4.21) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (4 pi alpha (rho + trS)), + + addMatter (4 Pi alpha (rho + trS)), (* PRD 62, 044034 (2000), eqn. (12) *) (* TODO: use Hamiltonian constraint to make tracefree *) @@ -501,7 +499,7 @@ evolCalcBSSNW = + At[la,lc] PD[beta[uc],lb] + At[lb,lc] PD[beta[uc],la] - (2/3) At[la,lb] PD[beta[uc],lc] (* Equation (4.23) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (- W2 alpha 8 pi + + addMatter (- W2 alpha 8 Pi (T[la,lb] - (1/3) g[la,lb] trS)), (* dot[alpha] -> - harmonicF alpha^harmonicN trK, *) @@ -682,7 +680,7 @@ constraintsCalcBSSNW = (* H -> trR - Km[ua,lb] Km[ub,la] + trK^2, *) (* PRD 67, 084023 (2003), eqn. (19) *) - H -> trR - Atm[ua,lb] Atm[ub,la] + (2/3) trK^2 - addMatter 16 pi rho, + H -> trR - Atm[ua,lb] Atm[ub,la] + (2/3) trK^2 - addMatter 16 Pi rho, (* gK[la,lb,lc] -> CD[K[la,lb],lc], *) (* gK[la,lb,lc] -> + 4 e4phi PD[phi,lc] At[la,lb] + e4phi CD[At[la,lb],lc] @@ -692,7 +690,7 @@ constraintsCalcBSSNW = M[li] -> + gtu[uj,uk] (CDt[At[li,lj],lk] + 6 At[li,lj] pdphi[lk]) - (2/3) PD[trK,li] - - addMatter 8 pi S[li], + - addMatter 8 Pi S[li], (* TODO: use PRD 67, 084023 (2003), eqn. (20) *) (* det gamma-tilde *) diff --git a/m/McLachlan_ADM.m b/m/McLachlan_ADM.m index 92f0590..94ab2ae 100644 --- a/m/McLachlan_ADM.m +++ b/m/McLachlan_ADM.m @@ -82,8 +82,6 @@ ddetgExpr[la_] = Sum [D[Det[MatrixOfComponents[g[la, lb]]], X] PD[X, la], {X, Union[Flatten[MatrixOfComponents[g[la, lb]]]]}]; -pi = N[Pi,40]; - (******************************************************************************) (* Groups *) (******************************************************************************) diff --git a/m/McLachlan_ADMConstraints.m b/m/McLachlan_ADMConstraints.m index 19346e7..eaddae6 100644 --- a/m/McLachlan_ADMConstraints.m +++ b/m/McLachlan_ADMConstraints.m @@ -82,14 +82,10 @@ ddetgExpr[la_] = Sum [D[Det[MatrixOfComponents[g[la, lb]]], X] PD[X, la], {X, Union[Flatten[MatrixOfComponents[g[la, lb]]]]}]; -pi = N[Pi,40]; - (******************************************************************************) (* Groups *) (******************************************************************************) -SetGroupTimelevels[g_,tl_] = Join[g, {Timelevels -> tl}]; - evolvedGroups = {}; evaluatedGroups = {SetGroupName [CreateGroupFromTensor [H ], prefix <> "Ham"], @@ -163,9 +159,9 @@ ADMConstraintsCalc = (* ADM constraints *) H -> + trR - Km[ua,lb] Km[ub,la] + trK^2 - - addMatter 16 pi rho, + - addMatter 16 Pi rho, M[la] -> + gu[ub,uc] (CD[K[lc,la], lb] - CD[K[lc,lb], la]) - - addMatter 8 pi S[la] + - addMatter 8 Pi S[la] } }; diff --git a/m/McLachlan_ADMQuantities.m b/m/McLachlan_ADMQuantities.m index 003a9f8..2f78d56 100644 --- a/m/McLachlan_ADMQuantities.m +++ b/m/McLachlan_ADMQuantities.m @@ -149,8 +149,6 @@ T11=eTxx; T12=eTxy; T22=eTyy; T13=eTxz; T23=eTyz; T33=eTzz; detgtExpr = Det [MatrixOfComponents [gt[la,lb]]]; -pi = N[Pi,40]; - (******************************************************************************) (* Groups *) (******************************************************************************) @@ -257,16 +255,16 @@ ADMQuantitiesCalc = (* ADM quantities *) (* See PRD 66, 084026 (2002) *) - Madm -> 1/(16 pi) - (+ ephi^5 (+ 16 pi addMatter rho + Madm -> 1/(16 Pi) + (+ ephi^5 (+ 16 Pi addMatter rho + Atm[ua,lb] Atm[ub,la] - 2/3 trK^2) - gtu[ua,ub] Gt[uc,la,ld] Gtlu[lb,lc,ud] + (1 - ephi) trRt), - Jadm[li] -> 1/(16 pi) Eps[li,lj,uk] ephi^6 + Jadm[li] -> 1/(16 Pi) Eps[li,lj,uk] ephi^6 (+ 2 Atm[uj,lk] - + 16 pi x[uj] S[lk] + + 16 Pi x[uj] S[lk] + 4/3 x[uj] PD[trK,lk] - x[uj] dgtu[ul,um,lk] At[ll,lm]) } diff --git a/m/McLachlan_BSSN.m b/m/McLachlan_BSSN.m index 30cda03..6943a3e 100644 --- a/m/McLachlan_BSSN.m +++ b/m/McLachlan_BSSN.m @@ -159,8 +159,6 @@ T11=eTxx; T12=eTxy; T22=eTyy; T13=eTxz; T23=eTyz; T33=eTzz; (* Expressions *) (******************************************************************************) -pi = N[Pi,40]; - detgExpr = Det [MatrixOfComponents [g [la,lb]]]; ddetgExpr[la_] = Sum [D[Det[MatrixOfComponents[g[la, lb]]], X] PD[X, la], @@ -171,7 +169,7 @@ ddetgtExpr[la_] = Sum [D[Det[MatrixOfComponents[gt[la, lb]]], X] PD[X, la], {X, Union[Flatten[MatrixOfComponents[gt[la, lb]]]]}]; -etaExpr = Min [SpatialBetaDriverRadius / r, 1]; +etaExpr = SpatialBetaDriverRadius / Max [r, SpatialBetaDriverRadius]; thetaExpr = Min [Exp [1 - r / SpatialShiftGammaCoeffRadius], 1]; @@ -180,8 +178,6 @@ thetaExpr = Min [Exp [1 - r / SpatialShiftGammaCoeffRadius], 1]; (* Groups *) (******************************************************************************) -SetGroupTimelevels[g_,tl_] = Join[g, {Timelevels -> tl}]; - evolvedGroups = {SetGroupName [CreateGroupFromTensor [phi ], prefix <> "log_confac"], SetGroupName [CreateGroupFromTensor [gt[la,lb]], prefix <> "metric" ], @@ -642,7 +638,7 @@ evolCalc = + 2 gtu[ui,uj] (alpha PD[Tet,lj] - Tet PD[alpha,lj]) - 2 alpha e4phi dampk1 Zet[ui] (* Equation (4.28) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (- 16 pi alpha gtu[ui,uj] S[lj]), + + addMatter (- 16 Pi alpha gtu[ui,uj] S[lj]), dot[Xt[ui]] -> dotXt[ui], (* Equation for Theta *) @@ -665,7 +661,7 @@ evolCalc = + 2 dotTet + 2 PD[alpha,la] Zet[ua] + dampk1 alpha Tet - dampk1k2 alpha Tet (* Equation (4.21) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (4 pi alpha (rho + trS)), + + addMatter (4 Pi alpha (rho + trS)), dot[trK] -> dottrK, (* PRD 62, 044034 (2000), eqn. (12) *) @@ -683,7 +679,7 @@ evolCalc = (* damping term in trA, alternative to trA cleaning *) (* - (dampA/3) alpha At[lc,ld] gtu[uc,ud] gt[la,lb] *) (* Equation (4.23) in Baumgarte & Shapiro (Phys. Rept. 376 (2003) 41-131) *) - + addMatter (- em4phi alpha 8 pi + + addMatter (- em4phi alpha 8 Pi (T[la,lb] - (1/3) g[la,lb] trS)), (* dot[alpha] -> - harmonicF alpha^harmonicN trK, *) @@ -700,10 +696,9 @@ evolCalc = *) dot[alpha] -> - harmonicF alpha^harmonicN (+ LapseACoeff A - + (1 - LapseACoeff) (trK - 2 Tet)), + + (1 - LapseACoeff) (trK + AlphaDriver (alpha - 1) - 2 Tet)), dot[A] -> + LapseACoeff (dottrK - 2 dotTet - AlphaDriver A), - eta -> etaExpr, theta -> thetaExpr, @@ -756,16 +751,22 @@ advectCalc = dot[At[la,lb]] -> dot[At[la,lb]] + Upwind[beta[uc], At[la,lb], lc], - dot[alpha] -> dot[alpha] + LapseAdvectionCoeff Upwind[beta[ua], alpha, la], + dot[alpha] -> dot[alpha] + + LapseAdvectionCoeff Upwind[beta[ua], alpha, la], - dot[A] -> dot[A] + LapseAdvectionCoeff Upwind[beta[ua], A, la], + dot[A] -> dot[A] + + LapseACoeff ( + + LapseAdvectionCoeff Upwind[beta[ua], A, la] + + (1 - LapseAdvectionCoeff) Upwind[beta[ua], trK, la]), - dot[beta[ua]] -> dot[beta[ua]] + ShiftAdvectionCoeff Upwind[beta[ub], beta[ua], lb], + dot[beta[ua]] -> dot[beta[ua]] + + ShiftAdvectionCoeff Upwind[beta[ub], beta[ua], lb], dot[B[ua]] -> dot[B[ua]] - + ShiftBCoeff Upwind[beta[uj], Xt[ua], lj] (* take care *) - + ShiftAdvectionCoeff Upwind[beta[ub], B[ua], lb] - - ShiftAdvectionCoeff Upwind[beta[ub], Xt[ua], lb] + + ShiftBCoeff ( + + ShiftAdvectionCoeff Upwind[beta[ub], B[ua], lb] + + ((1 - ShiftAdvectionCoeff) + Upwind[beta[ub], Xt[ua], lb])) } }; @@ -1082,7 +1083,7 @@ constraintsCalc = (* H -> trR - Km[ua,lb] Km[ub,la] + trK^2, *) (* PRD 67, 084023 (2003), eqn. (19) *) - H -> trR - Atm[ua,lb] Atm[ub,la] + (2/3) trK^2 - addMatter 16 pi rho, + H -> trR - Atm[ua,lb] Atm[ub,la] + (2/3) trK^2 - addMatter 16 Pi rho, (* gK[la,lb,lc] -> CD[K[la,lb],lc], *) (* gK[la,lb,lc] -> + 4 e4phi PD[phi,lc] At[la,lb] + e4phi CD[At[la,lb],lc] @@ -1092,7 +1093,7 @@ constraintsCalc = M[li] -> + gtu[uj,uk] (CDt[At[li,lj],lk] + 6 At[li,lj] cdphi[lk]) - (2/3) PD[trK,li] - - addMatter 8 pi S[li], + - addMatter 8 Pi S[li], (* TODO: use PRD 67, 084023 (2003), eqn. (20) *) (* det gamma-tilde *) diff --git a/m/McLachlan_WeylScalars.m b/m/McLachlan_WeylScalars.m index 5e8d805..df10897 100644 --- a/m/McLachlan_WeylScalars.m +++ b/m/McLachlan_WeylScalars.m @@ -105,12 +105,6 @@ Map [DefineTensor, dtbetax, dtbetay, dtbetaz}]; (******************************************************************************) -(* Expressions *) -(******************************************************************************) - -pi = N[Pi,40]; - -(******************************************************************************) (* Groups *) (******************************************************************************) |